DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 01/20] common/cnxk: implement SSO HW info
@ 2024-10-03 13:22 pbhagavatula
  2024-10-03 13:22 ` [PATCH 02/20] event/cnxk: add CN20K specific device probe pbhagavatula
                   ` (20 more replies)
  0 siblings, 21 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Ankur Dwivedi, Anoob Joseph,
	Tejasree Kondoj, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HW info mbox to get hardware capabilities, and reuse
them instead of depending on hardcoded values.
Remove redundant includes.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
Depends-on: series-33204 ("add Marvell cn20k SOC support for mempool and net")

 drivers/common/cnxk/roc_mbox.h              | 28 ++++++++++
 drivers/common/cnxk/roc_sso.c               | 58 ++++++++++++++++++---
 drivers/common/cnxk/roc_sso.h               |  9 ++--
 drivers/common/cnxk/version.map             |  1 +
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c   |  5 +-
 drivers/crypto/cnxk/cn9k_cryptodev_ops.c    |  9 +---
 drivers/event/cnxk/cn10k_eventdev.c         |  1 +
 drivers/event/cnxk/cn10k_eventdev.h         |  1 +
 drivers/event/cnxk/cn10k_worker.c           |  7 +--
 drivers/event/cnxk/cnxk_eventdev.c          |  4 +-
 drivers/event/cnxk/cnxk_eventdev.h          |  3 --
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  2 +
 drivers/event/cnxk/cnxk_eventdev_stats.c    |  2 +
 drivers/event/cnxk/cnxk_tim_evdev.c         |  2 +-
 drivers/event/cnxk/cnxk_tim_worker.c        |  2 +
 drivers/event/cnxk/cnxk_worker.c            |  4 +-
 16 files changed, 103 insertions(+), 35 deletions(-)

diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index dd65946e9e..63139b5517 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,7 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
 	  tim_lf_alloc_rsp)                                                    \
@@ -2119,6 +2120,33 @@ struct ssow_chng_mship {
 	uint16_t __io hwgrps[MAX_RVU_BLKLF_CNT]; /* Array of hwgrps. */
 };

+struct sso_feat_info {
+	uint8_t __io hw_flr : 1;
+	uint8_t __io hw_prefetch : 1;
+	uint8_t __io sw_prefetch : 1;
+	uint8_t __io lsw : 1;
+	uint8_t __io fwd_grp : 1;
+	uint8_t __io eva_present : 1;
+	uint8_t __io no_nsched : 1;
+	uint8_t __io tag_cfg : 1;
+	uint8_t __io gwc_per_core;
+	uint16_t __io hws;
+	uint16_t __io hwgrps;
+	uint16_t __io hwgrps_per_pf;
+	uint16_t __io iue;
+	uint16_t __io taq_lines;
+	uint16_t __io taq_ent_per_line;
+	uint16_t __io xaq_buf_size;
+	uint16_t __io xaq_wq_entries;
+	uint32_t __io eva_ctx_per_hwgrp;
+	uint64_t __io rsvd[2];
+};
+
+struct sso_hw_info {
+	struct mbox_msghdr hdr;
+	struct sso_feat_info feat;
+};
+
 struct sso_hw_setconfig {
 	struct mbox_msghdr hdr;
 	uint32_t __io npa_aura_id;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 7d45b06dda..ff76565af9 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -191,7 +191,7 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 		goto exit;
 	}

-	roc_sso->max_hwgrp = rsrc_cnt->sso;
+	roc_sso->max_hwgrp = PLT_MIN(rsrc_cnt->sso, roc_sso->feat.hwgrps_per_pf);
 	roc_sso->max_hws = rsrc_cnt->ssow;

 	rc = 0;
@@ -200,6 +200,37 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 	return rc;
 }

+static int
+sso_hw_info_get(struct roc_sso *roc_sso)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct sso_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_sso_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info\n");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc == MBOX_MSG_INVALID) {
+		roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	} else {
+		mbox_memcpy(&roc_sso->feat, &rsp->feat, sizeof(roc_sso->feat));
+
+		if (!roc_sso->feat.hwgrps_per_pf)
+			roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	}
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 void
 sso_hws_link_modify(uint8_t hws, uintptr_t base, struct plt_bitmap *bmp, uint16_t hwgrp[],
 		    uint16_t n, uint8_t set, uint16_t enable)
@@ -319,6 +350,12 @@ roc_sso_hwgrp_base_get(struct roc_sso *roc_sso, uint16_t hwgrp)
 	return dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | hwgrp << 12);
 }

+uint16_t
+roc_sso_pf_func_get(void)
+{
+	return idev_sso_pffunc_get();
+}
+
 uint64_t
 roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 {
@@ -670,9 +707,8 @@ roc_sso_hwgrp_init_xaq_aura(struct roc_sso *roc_sso, uint32_t nb_xae)
 	struct dev *dev = &sso->dev;
 	int rc;

-	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae,
-				     roc_sso->xae_waes, roc_sso->xaq_buf_size,
-				     roc_sso->nb_hwgrp);
+	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae, roc_sso->feat.xaq_wq_entries,
+				     roc_sso->feat.xaq_buf_size, roc_sso->nb_hwgrp);
 	return rc;
 }

@@ -953,9 +989,11 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 		goto hwgrp_alloc_fail;
 	}

-	roc_sso->xaq_buf_size = rsp_hwgrp->xaq_buf_size;
-	roc_sso->xae_waes = rsp_hwgrp->xaq_wq_entries;
-	roc_sso->iue = rsp_hwgrp->in_unit_entries;
+	if (!roc_sso->feat.xaq_buf_size || !roc_sso->feat.xaq_wq_entries || !roc_sso->feat.iue) {
+		roc_sso->feat.xaq_buf_size = rsp_hwgrp->xaq_buf_size;
+		roc_sso->feat.xaq_wq_entries = rsp_hwgrp->xaq_wq_entries;
+		roc_sso->feat.iue = rsp_hwgrp->in_unit_entries;
+	}

 	rc = sso_msix_fill(roc_sso, nb_hws, nb_hwgrp);
 	if (rc < 0) {
@@ -1059,6 +1097,12 @@ roc_sso_dev_init(struct roc_sso *roc_sso)
 		goto fail;
 	}

+	rc = sso_hw_info_get(roc_sso);
+	if (rc < 0) {
+		plt_err("Failed to get SSO HW info");
+		goto fail;
+	}
+
 	rc = sso_rsrc_get(roc_sso);
 	if (rc < 0) {
 		plt_err("Failed to get SSO resources");
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 4ac901762e..021db22c86 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -8,7 +8,7 @@
 #include "hw/ssow.h"

 #define ROC_SSO_AW_PER_LMT_LINE_LOG2 3
-#define ROC_SSO_XAE_PER_XAQ	     352
+#define ROC_SSO_MAX_HWGRP_PER_PF     256

 struct roc_sso_hwgrp_qos {
 	uint16_t hwgrp;
@@ -57,9 +57,7 @@ struct roc_sso {
 	uintptr_t lmt_base;
 	struct roc_sso_xaq_data xaq;
 	/* HW Const. */
-	uint32_t xae_waes;
-	uint32_t xaq_buf_size;
-	uint32_t iue;
+	struct sso_feat_info feat;
 	/* Private data. */
 #define ROC_SSO_MEM_SZ (16 * 1024)
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
@@ -103,6 +101,9 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);

+/* Utility function */
+uint16_t __roc_api roc_sso_pf_func_get(void);
+
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 6f8a2e02da..315596217c 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -515,6 +515,7 @@ INTERNAL {
 	roc_sso_hws_gwc_invalidate;
 	roc_sso_hws_unlink;
 	roc_sso_ns_to_gw;
+	roc_sso_pf_func_get;
 	roc_sso_rsrc_fini;
 	roc_sso_rsrc_init;
 	roc_tim_fini;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 780785d656..c67940a791 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -11,10 +11,7 @@

 #include <ethdev_driver.h>

-#include "roc_cpt.h"
-#include "roc_idev.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"

 #include "cn10k_cryptodev.h"
 #include "cn10k_cryptodev_event_dp.h"
diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
index f443cb9563..e180b7e33d 100644
--- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
@@ -8,14 +8,7 @@
 #include <rte_ip.h>
 #include <rte_vect.h>

-#include "roc_cpt.h"
-#if defined(__aarch64__)
-#include "roc_io.h"
-#else
-#include "roc_io_generic.h"
-#endif
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"

 #include "cn9k_cryptodev.h"
 #include "cn9k_cryptodev_ops.h"
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4e2968f91e..a1aa4d0bfd 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -64,6 +64,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;

 	return ws;
 }
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index 372121465c..cfc0f94eca 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -23,6 +23,7 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	int64_t *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
+	uint16_t xae_waes;
 	int32_t xaq_lmt;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index d59769717e..a8b749f9c9 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */

+#include "roc_api.h"
+
 #include "cn10k_worker.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -80,8 +82,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
-	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) *
-	       ROC_SSO_XAE_PER_XAQ;
+	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) * ws->xae_waes;
 }

 static inline void
@@ -398,7 +399,7 @@ cn10k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
 	int32_t space;

 	/* Do a common back-pressure check and return */
-	space = sso_read_xaq_space(ws) - ROC_SSO_XAE_PER_XAQ;
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
 	if (space <= 0)
 		return 0;
 	nb_events = space < nb_events ? space : nb_events;
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 4b2d6bffa6..4c5b2f7f59 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -2,7 +2,7 @@
  * Copyright(C) 2021 Marvell.
  */

-#include "roc_npa.h"
+#include "roc_api.h"

 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
@@ -47,7 +47,7 @@ cnxk_sso_xaq_allocate(struct cnxk_sso_evdev *dev)
 	if (dev->num_events > 0)
 		xae_cnt = dev->num_events;
 	else
-		xae_cnt = dev->sso.iue;
+		xae_cnt = dev->sso.feat.iue;

 	if (dev->xae_cnt)
 		xae_cnt += dev->xae_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index ece49394e7..811b2c5ba0 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -21,9 +21,6 @@

 #include "cnxk_eventdev_dp.h"

-#include "roc_platform.h"
-#include "roc_sso.h"
-
 #include "cnxk_tim_evdev.h"

 #define CNXK_SSO_XAE_CNT   "xae_cnt"
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 95c0f1b1f7..a371e44300 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -18,6 +18,8 @@
 #include <rte_random.h>
 #include <rte_test.h>

+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"

diff --git a/drivers/event/cnxk/cnxk_eventdev_stats.c b/drivers/event/cnxk/cnxk_eventdev_stats.c
index a8a87a06e4..6dea91aedf 100644
--- a/drivers/event/cnxk/cnxk_eventdev_stats.c
+++ b/drivers/event/cnxk/cnxk_eventdev_stats.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */

+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 6d59fdf909..f8753b29ad 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -4,7 +4,7 @@

 #include <math.h>

-#include "roc_npa.h"
+#include "roc_api.h"

 #include "cnxk_eventdev.h"
 #include "cnxk_tim_evdev.h"
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 1f2f2fe5d8..5dcf6085dc 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */

+#include "roc_api.h"
+
 #include "cnxk_tim_evdev.h"
 #include "cnxk_tim_worker.h"

diff --git a/drivers/event/cnxk/cnxk_worker.c b/drivers/event/cnxk/cnxk_worker.c
index 60876abcff..a07c9185d9 100644
--- a/drivers/event/cnxk/cnxk_worker.c
+++ b/drivers/event/cnxk/cnxk_worker.c
@@ -6,9 +6,7 @@
 #include <rte_pmd_cnxk_eventdev.h>
 #include <rte_eventdev.h>

-#include "roc_platform.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"

 struct pwords {
 	uint64_t u[5];
--
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 02/20] event/cnxk: add CN20K specific device probe
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-15 16:17   ` Stephen Hemminger
  2024-10-03 13:22 ` [PATCH 03/20] event/cnxk: add CN20K device config pbhagavatula
                   ` (19 subsequent siblings)
  20 siblings, 1 reply; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add platform specific event device probe and remove, also add
event device info get function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 doc/guides/eventdevs/cnxk.rst          | 30 +++++----
 doc/guides/rel_notes/release_24_11.rst |  4 ++
 drivers/common/cnxk/roc_sso.c          | 10 ++-
 drivers/event/cnxk/cn20k_eventdev.c    | 93 ++++++++++++++++++++++++++
 drivers/event/cnxk/meson.build         |  8 ++-
 5 files changed, 128 insertions(+), 17 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.c

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index d038930594..3b79efc7f8 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -16,6 +16,7 @@ Supported OCTEON cnxk SoCs
 
 - CN9XX
 - CN10XX
+- CN20XX
 
 Features
 --------
@@ -36,7 +37,7 @@ Features of the OCTEON cnxk SSO PMD are:
   DRAM
 - HW accelerated dequeue timeout support to enable power management
 - HW managed event timers support through TIM, with high precision and
-  time granularity of 2.5us on CN9K and 1us on CN10K.
+  time granularity of 2.5us on CN9K and 1us on CN10K/CN20K.
 - Up to 256 TIM rings a.k.a event timer adapters.
 - Up to 8 rings traversed in parallel.
 - HW managed packets enqueued from ethdev to eventdev exposed through event eth
@@ -45,8 +46,8 @@ Features of the OCTEON cnxk SSO PMD are:
 - Lockfree Tx from event eth Tx adapter using ``RTE_ETH_TX_OFFLOAD_MT_LOCKFREE``
   capability while maintaining receive packet order.
 - Full Rx/Tx offload support defined through ethdev queue configuration.
-- HW managed event vectorization on CN10K for packets enqueued from ethdev to
-  eventdev configurable per each Rx queue in Rx adapter.
+- HW managed event vectorization on CN10K/CN20K for packets enqueued from ethdev
+  to eventdev configurable per each Rx queue in Rx adapter.
 - Event vector transmission via Tx adapter.
 - Up to 2 event link profiles.
 
@@ -78,10 +79,11 @@ Runtime Config Options
 
     -a 0002:0e:00.0,single_ws=1
 
-- ``CN10K Getwork mode``
+- ``CN10K/CN20K Getwork mode``
 
-  CN10K supports three getwork prefetch modes no prefetch[0], prefetch
-  immediately[1] and delayed prefetch on forward progress event[2].
+  CN10K/CN20K supports three getwork prefetch modes no prefetch[0],
+  prefetch immediately[1] and delayed prefetch on forward progress
+  event[2].
   The default getwork mode is 2.
 
   For example::
@@ -103,13 +105,13 @@ Runtime Config Options
 
     -a 0002:0e:00.0,qos=[1-50-50]
 
-- ``CN10K WQE stashing support``
+- ``CN10K/CN20K WQE stashing support``
 
-  CN10K supports stashing the scheduled WQE carried by `rte_event` to the
-  cores L2 Dcache. The number of cache lines to be stashed and the offset
-  is configurable per HWGRP i.e. event queue. The dictionary format is as
-  follows `[Qx|stash_offset|stash_length]` here the stash offset can be
-  a negative integer.
+  CN10K/CN20K supports stashing the scheduled WQE carried by `rte_event`
+  to the cores L2 Dcache. The number of cache lines to be stashed and the
+  offset is configurable per HWGRP i.e. event queue. The dictionary format
+  is as follows `[Qx|stash_offset|stash_length]` here the stash offset can
+  be a negative integer.
   By default, stashing is enabled on queues which have been connected to
   Rx adapter. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed.
 
@@ -198,8 +200,8 @@ Runtime Config Options
 
     -a 0002:0e:00.0,tim_eclk_freq=122880000-1000000000-0
 
-Power Saving on CN10K
----------------------
+Power Saving on CN10K/CN20K
+---------------------------
 
 ARM cores can additionally use WFE when polling for transactions on SSO bus
 to save power i.e., in the event dequeue call ARM core can enter WFE and exit
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 023f357d94..9d65594f2f 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -63,6 +63,10 @@ New Features
 
   * Added ethdev driver support for CN20K SoC.
 
+* **Updated Marvell cnxk event device driver.**
+
+  * Added eventdev driver support for CN20K SoC.
+
 
 Removed Items
 -------------
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index ff76565af9..b4cfe04d40 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -870,7 +870,10 @@ sso_update_msix_vec_count(struct roc_sso *roc_sso, uint16_t sso_vec_cnt)
 	if (idev == NULL)
 		return -ENODEV;
 
-	mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
+	if (roc_model_is_cn20k())
+		mbox_vec_cnt = RVU_MBOX_PF_INT_VEC_AFPF_MBOX + 1;
+	else
+		mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
 
 	/* Allocating vectors for the first time */
 	if (plt_intr_max_intr_get(pci_dev->intr_handle) == 0) {
@@ -1017,7 +1020,10 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 	}
 
 	/* 2 error interrupt per TIM LF */
-	sso_vec_cnt += 2 * nb_tim_lfs;
+	if (roc_model_is_cn20k())
+		sso_vec_cnt += 3 * nb_tim_lfs;
+	else
+		sso_vec_cnt += 2 * nb_tim_lfs;
 
 	rc = sso_update_msix_vec_count(roc_sso, sso_vec_cnt);
 	if (rc < 0) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
new file mode 100644
index 0000000000..c4b80f64f3
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include "roc_api.h"
+
+#include "cnxk_eventdev.h"
+
+static void
+cn20k_sso_set_rsrc(void *arg)
+{
+	struct cnxk_sso_evdev *dev = arg;
+
+	dev->max_event_ports = dev->sso.max_hws;
+	dev->max_event_queues = dev->sso.max_hwgrp > RTE_EVENT_MAX_QUEUES_PER_DEV ?
+					RTE_EVENT_MAX_QUEUES_PER_DEV :
+					dev->sso.max_hwgrp;
+}
+
+static void
+cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	dev_info->driver_name = RTE_STR(EVENTDEV_NAME_CN20K_PMD);
+	cnxk_sso_info_get(dev, dev_info);
+	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
+}
+
+static struct eventdev_ops cn20k_sso_dev_ops = {
+	.dev_infos_get = cn20k_sso_info_get,
+};
+
+static int
+cn20k_sso_init(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = roc_plt_init();
+	if (rc < 0) {
+		plt_err("Failed to initialize platform model");
+		return rc;
+	}
+
+	event_dev->dev_ops = &cn20k_sso_dev_ops;
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	rc = cnxk_sso_init(event_dev);
+	if (rc < 0)
+		return rc;
+
+	cn20k_sso_set_rsrc(cnxk_sso_pmd_priv(event_dev));
+	if (!dev->max_event_ports || !dev->max_event_queues) {
+		plt_err("Not enough eventdev resource queues=%d ports=%d", dev->max_event_queues,
+			dev->max_event_ports);
+		cnxk_sso_fini(event_dev);
+		return -ENODEV;
+	}
+
+	plt_sso_dbg("Initializing %s max_queues=%d max_ports=%d", event_dev->data->name,
+		    dev->max_event_queues, dev->max_event_ports);
+
+	return 0;
+}
+
+static int
+cn20k_sso_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	return rte_event_pmd_pci_probe(pci_drv, pci_dev, sizeof(struct cnxk_sso_evdev),
+				       cn20k_sso_init);
+}
+
+static const struct rte_pci_id cn20k_pci_sso_map[] = {
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_PF),
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_VF),
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver cn20k_pci_sso = {
+	.id_table = cn20k_pci_sso_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
+	.probe = cn20k_sso_probe,
+	.remove = cnxk_sso_remove,
+};
+
+RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
+RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
+RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index e0e350570d..ec74a0d6ce 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -14,7 +14,7 @@ else
         soc_type = platform
 endif
 
-if soc_type != 'cn9k' and soc_type != 'cn10k'
+if soc_type != 'cn9k' and soc_type != 'cn10k' and soc_type != 'cn20k'
         soc_type = 'all'
 endif
 
@@ -325,6 +325,12 @@ sources += files(
 endif
 endif
 
+if soc_type == 'cn20k' or soc_type == 'all'
+sources += files(
+        'cn20k_eventdev.c',
+)
+endif
+
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
 if cc.get_id() == 'clang'
         extra_flags += ['-Wno-asm-operand-widths']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 03/20] event/cnxk: add CN20K device config
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-03 13:22 ` [PATCH 02/20] event/cnxk: add CN20K specific device probe pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 04/20] event/cnxk: add CN20k event queue config pbhagavatula
                   ` (18 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event device configuration that attaches the requested
number of SSO HWS(event ports) and HWGRP(event queues) LFs to
the RVU PF/VF.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index c4b80f64f3..753a976cd3 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -17,6 +17,17 @@ cn20k_sso_set_rsrc(void *arg)
 					dev->sso.max_hwgrp;
 }
 
+static int
+cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
+{
+	struct cnxk_tim_evdev *tim_dev = cnxk_tim_priv_get();
+	struct cnxk_sso_evdev *dev = arg;
+	uint16_t nb_tim_lfs;
+
+	nb_tim_lfs = tim_dev ? tim_dev->nb_rings : 0;
+	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -27,8 +38,33 @@ cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *de
 	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
 }
 
+static int
+cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = cnxk_sso_dev_validate(event_dev, 1, UINT32_MAX);
+	if (rc < 0) {
+		plt_err("Invalid event device configuration");
+		return -EINVAL;
+	}
+
+	rc = cn20k_sso_rsrc_init(dev, dev->nb_event_ports, dev->nb_event_queues);
+	if (rc < 0) {
+		plt_err("Failed to initialize SSO resources");
+		return -ENODEV;
+	}
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
+	.dev_configure = cn20k_sso_dev_configure,
+
+	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 04/20] event/cnxk: add CN20k event queue config
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-03 13:22 ` [PATCH 02/20] event/cnxk: add CN20K specific device probe pbhagavatula
  2024-10-03 13:22 ` [PATCH 03/20] event/cnxk: add CN20K device config pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 05/20] event/cnxk: add CN20K event port configuration pbhagavatula
                   ` (17 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add setup and release functions for event queues i.e. SSO HWGRPs.
Allocate buffers in DRAM that hold inflight events.
Register device args to modify inflight event buffer count,
HWGRP QoS and stash.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c |  2 +-
 drivers/event/cnxk/cn20k_eventdev.c | 14 ++++++++++++++
 drivers/event/cnxk/cnxk_eventdev.c  |  4 ++--
 drivers/event/cnxk/cnxk_eventdev.h  |  2 +-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index a1aa4d0bfd..8ce75951c6 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -1304,7 +1304,7 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
 			      CN10K_SSO_GW_MODE "=<int>"
-			      CN10K_SSO_STASH "=<string>"
+			      CNXK_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
 			      CNXK_TIM_RINGS_LMT "=<int>"
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 753a976cd3..b876c36806 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -56,6 +56,12 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 		return -ENODEV;
 	}
 
+	rc = cnxk_sso_xaq_allocate(dev);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+cnxk_rsrc_fini:
+	roc_sso_rsrc_fini(&dev->sso);
 	return rc;
 }
 
@@ -64,6 +70,10 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_configure = cn20k_sso_dev_configure,
 
 	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.queue_setup = cnxk_sso_queue_setup,
+	.queue_release = cnxk_sso_queue_release,
+	.queue_attr_set = cnxk_sso_queue_attribute_set,
+
 	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
@@ -127,3 +137,7 @@ static struct rte_pci_driver cn20k_pci_sso = {
 RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
 RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
 RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
+			      CNXK_SSO_XAE_CNT "=<int>"
+			      CNXK_SSO_GGRP_QOS "=<string>"
+			      CNXK_SSO_STASH "=<string>");
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 4c5b2f7f59..5c02733863 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -626,8 +626,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
 			   &single_ws);
 	rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value,
 			   &dev->gw_mode);
-	rte_kvargs_process(kvlist, CN10K_SSO_STASH,
-			   &parse_sso_kvargs_stash_dict, dev);
+	rte_kvargs_process(kvlist, CNXK_SSO_STASH, &parse_sso_kvargs_stash_dict,
+			   dev);
 	dev->dual_ws = !single_ws;
 	rte_kvargs_free(kvlist);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 811b2c5ba0..654ad225a0 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -28,7 +28,7 @@
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
 #define CN10K_SSO_GW_MODE  "gw_mode"
-#define CN10K_SSO_STASH	   "stash"
+#define CNXK_SSO_STASH	   "stash"
 
 #define CNXK_SSO_MAX_PROFILES 2
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 05/20] event/cnxk: add CN20K event port configuration
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (2 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 04/20] event/cnxk: add CN20k event queue config pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 06/20] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
                   ` (16 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HWS a.k.a event port setup, release, link, unlink
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c         |  52 ++----
 drivers/event/cnxk/cn20k_eventdev.c         | 174 +++++++++++++++++++-
 drivers/event/cnxk/cn20k_eventdev.h         |  25 +++
 drivers/event/cnxk/cnxk_common.h            |  34 ++++
 drivers/event/cnxk/cnxk_eventdev.c          |   2 +-
 drivers/event/cnxk/cnxk_eventdev.h          |   8 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c |   6 +-
 7 files changed, 255 insertions(+), 46 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.h
 create mode 100644 drivers/event/cnxk/cnxk_common.h

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8ce75951c6..1963896aa2 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -2,15 +2,16 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include <rte_dmadev_pmd.h>
+
+#include "cn10k_cryptodev_ops.h"
+#include "cn10k_ethdev.h"
 #include "cn10k_tx_worker.h"
 #include "cn10k_worker.h"
-#include "cn10k_ethdev.h"
-#include "cn10k_cryptodev_ops.h"
+#include "cnxk_common.h"
+#include "cnxk_dma_event_dp.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
-#include "cnxk_dma_event_dp.h"
-
-#include <rte_dmadev_pmd.h>
 
 #define CN10K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                           \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
@@ -18,29 +19,6 @@
 #define CN10K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                           \
 	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
 
-static uint32_t
-cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
-{
-	uint32_t wdata = 1;
-
-	if (dev->deq_tmo_ns)
-		wdata |= BIT(16);
-
-	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_NONE:
-	default:
-		break;
-	case CN10K_GW_MODE_PREF:
-		wdata |= BIT(19);
-		break;
-	case CN10K_GW_MODE_PREF_WFE:
-		wdata |= BIT(20) | BIT(19);
-		break;
-	}
-
-	return wdata;
-}
-
 static void *
 cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -61,7 +39,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
 	ws->hws_id = port_id;
 	ws->swtag_req = 0;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
 	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
@@ -219,12 +197,12 @@ cn10k_sso_hws_reset(void *arg, void *hws)
 	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
 
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
@@ -648,13 +626,13 @@ cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	/* Check if we have work in PRF_WQE0, if so extract it. */
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
 		       BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
@@ -1246,7 +1224,7 @@ cn10k_sso_init(struct rte_eventdev *event_dev)
 		return 0;
 	}
 
-	dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
+	dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 	rc = cnxk_sso_init(event_dev);
 	if (rc < 0)
 		return rc;
@@ -1303,7 +1281,7 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
-			      CN10K_SSO_GW_MODE "=<int>"
+			      CNXK_SSO_GW_MODE "=<int>"
 			      CNXK_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index b876c36806..6a80bddb7c 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,7 +4,85 @@
 
 #include "roc_api.h"
 
+#include "cn20k_eventdev.h"
+#include "cnxk_common.h"
 #include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static void *
+cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws;
+
+	/* Allocate event port memory */
+	ws = rte_zmalloc("cn20k_ws", sizeof(struct cn20k_sso_hws) + RTE_CACHE_LINE_SIZE,
+			 RTE_CACHE_LINE_SIZE);
+	if (ws == NULL) {
+		plt_err("Failed to alloc memory for port=%d", port_id);
+		return NULL;
+	}
+
+	/* First cache line is reserved for cookie */
+	ws = (struct cn20k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
+	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+	ws->hws_id = port_id;
+	ws->swtag_req = 0;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->gw_rdata = SSO_TT_EMPTY << 32;
+
+	return ws;
+}
+
+static int
+cn20k_sso_hws_link(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_link(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static int
+cn20k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_unlink(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static void
+cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t val;
+
+	ws->grp_base = grp_base;
+	ws->fc_mem = (int64_t *)dev->fc_iova;
+	ws->xaq_lmt = dev->xaq_lmt;
+	ws->fc_cache_space = dev->fc_cache_space;
+	ws->aw_lmt = dev->sso.lmt_base;
+
+	/* Set get_work timeout for HWS */
+	val = NSEC2USEC(dev->deq_tmo_ns);
+	val = val ? val - 1 : 0;
+	plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
+}
+
+static void
+cn20k_sso_hws_release(void *arg, void *hws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint16_t i, j;
+
+	for (i = 0; i < CNXK_SSO_MAX_PROFILES; i++)
+		for (j = 0; j < dev->nb_event_queues; j++)
+			roc_sso_hws_unlink(&dev->sso, ws->hws_id, &j, 1, i, 0);
+	memset(ws, 0, sizeof(*ws));
+}
 
 static void
 cn20k_sso_set_rsrc(void *arg)
@@ -60,11 +138,96 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
+	rc = cnxk_setup_event_ports(event_dev, cn20k_sso_init_hws_mem, cn20k_sso_hws_setup);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+	/* Restore any prior port-queue mapping. */
+	cnxk_sso_restore_links(event_dev, cn20k_sso_hws_link);
+
+	dev->configured = 1;
+	rte_mb();
+
+	return 0;
 cnxk_rsrc_fini:
 	roc_sso_rsrc_fini(&dev->sso);
+	dev->nb_event_ports = 0;
 	return rc;
 }
 
+static int
+cn20k_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
+		     const struct rte_event_port_conf *port_conf)
+{
+
+	RTE_SET_USED(port_conf);
+	return cnxk_sso_port_setup(event_dev, port_id, cn20k_sso_hws_setup);
+}
+
+static void
+cn20k_sso_port_release(void *port)
+{
+	struct cnxk_sso_hws_cookie *gws_cookie = cnxk_sso_hws_get_cookie(port);
+	struct cnxk_sso_evdev *dev;
+
+	if (port == NULL)
+		return;
+
+	dev = cnxk_sso_pmd_priv(gws_cookie->event_dev);
+	if (!gws_cookie->configured)
+		goto free;
+
+	cn20k_sso_hws_release(dev, port);
+	memset(gws_cookie, 0, sizeof(*gws_cookie));
+free:
+	rte_free(gws_cookie);
+}
+
+static int
+cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_links];
+	uint16_t link;
+
+	RTE_SET_USED(priorities);
+	for (link = 0; link < nb_links; link++)
+		hwgrp_ids[link] = queues[link];
+	nb_links = cn20k_sso_hws_link(dev, port, hwgrp_ids, nb_links, profile);
+
+	return (int)nb_links;
+}
+
+static int
+cn20k_sso_port_unlink_profile(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+			      uint16_t nb_unlinks, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_unlinks];
+	uint16_t unlink;
+
+	for (unlink = 0; unlink < nb_unlinks; unlink++)
+		hwgrp_ids[unlink] = queues[unlink];
+	nb_unlinks = cn20k_sso_hws_unlink(dev, port, hwgrp_ids, nb_unlinks, profile);
+
+	return (int)nb_unlinks;
+}
+
+static int
+cn20k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+		    const uint8_t priorities[], uint16_t nb_links)
+{
+	return cn20k_sso_port_link_profile(event_dev, port, queues, priorities, nb_links, 0);
+}
+
+static int
+cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+		      uint16_t nb_unlinks)
+{
+	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -75,6 +238,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.queue_attr_set = cnxk_sso_queue_attribute_set,
 
 	.port_def_conf = cnxk_sso_port_def_conf,
+	.port_setup = cn20k_sso_port_setup,
+	.port_release = cn20k_sso_port_release,
+	.port_link = cn20k_sso_port_link,
+	.port_unlink = cn20k_sso_port_unlink,
+	.port_link_profile = cn20k_sso_port_link_profile,
+	.port_unlink_profile = cn20k_sso_port_unlink_profile,
+	.timeout_ticks = cnxk_sso_timeout_ticks,
 };
 
 static int
@@ -94,6 +264,7 @@ cn20k_sso_init(struct rte_eventdev *event_dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
+	dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 	rc = cnxk_sso_init(event_dev);
 	if (rc < 0)
 		return rc;
@@ -140,4 +311,5 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
-			      CNXK_SSO_STASH "=<string>");
+			      CNXK_SSO_STASH "=<string>"
+			      CNXK_SSO_GW_MODE "=<int>");
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
new file mode 100644
index 0000000000..50a9df77cb
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef __CN20K_EVENTDEV_H__
+#define __CN20K_EVENTDEV_H__
+
+#define CN20K_SSO_DEFAULT_STASH_OFFSET -1
+#define CN20K_SSO_DEFAULT_STASH_LENGTH 2
+
+struct __rte_cache_aligned cn20k_sso_hws {
+	uint64_t base;
+	uint32_t gw_wdata;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+	/* Add Work Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
+	int64_t *fc_cache_space;
+	uintptr_t aw_lmt;
+	uintptr_t grp_base;
+	int32_t xaq_lmt;
+};
+
+#endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
new file mode 100644
index 0000000000..5f6aeb37eb
--- /dev/null
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CNXK_COMMON_H__
+#define __CNXK_COMMON_H__
+
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static uint32_t
+cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
+{
+	uint32_t wdata = 1;
+
+	if (dev->deq_tmo_ns)
+		wdata |= BIT(16);
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	case CNXK_GW_MODE_PREF:
+		wdata |= BIT(19);
+		break;
+	case CNXK_GW_MODE_PREF_WFE:
+		wdata |= BIT(20) | BIT(19);
+		break;
+	}
+
+	return wdata;
+}
+
+#endif /* __CNXK_COMMON_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 5c02733863..f68052d8f4 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -624,7 +624,7 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
 			   &dev->force_ena_bp);
 	rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag,
 			   &single_ws);
-	rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_value,
+	rte_kvargs_process(kvlist, CNXK_SSO_GW_MODE, &parse_kvargs_value,
 			   &dev->gw_mode);
 	rte_kvargs_process(kvlist, CNXK_SSO_STASH, &parse_sso_kvargs_stash_dict,
 			   dev);
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 654ad225a0..75680f8fee 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -27,7 +27,7 @@
 #define CNXK_SSO_GGRP_QOS  "qos"
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
-#define CN10K_SSO_GW_MODE  "gw_mode"
+#define CNXK_SSO_GW_MODE   "gw_mode"
 #define CNXK_SSO_STASH	   "stash"
 
 #define CNXK_SSO_MAX_PROFILES 2
@@ -39,9 +39,9 @@
 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
 #define CN9K_DUAL_WS_NB_WS	     2
 
-#define CN10K_GW_MODE_NONE     0
-#define CN10K_GW_MODE_PREF     1
-#define CN10K_GW_MODE_PREF_WFE 2
+#define CNXK_GW_MODE_NONE     0
+#define CNXK_GW_MODE_PREF     1
+#define CNXK_GW_MODE_PREF_WFE 2
 
 #define CNXK_QOS_NORMALIZE(val, min, max, cnt)                                 \
 	(min + val / ((max + cnt - 1) / cnt))
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index a371e44300..7b5e3e5cf9 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1568,15 +1568,15 @@ cnxk_sso_selftest(const char *dev_name)
 
 	if (roc_model_runtime_is_cn10k()) {
 		printf("Verifying CN10K workslot getwork mode none\n");
-		dev->gw_mode = CN10K_GW_MODE_NONE;
+		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF;
+		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
+		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 06/20] event/cnxk: add CN20K SSO enqueue fast path
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (3 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 05/20] event/cnxk: add CN20K event port configuration pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-22  1:46   ` Stephen Hemminger
  2024-10-03 13:22 ` [PATCH 07/20] event/cnxk: add CN20K SSO dequeue " pbhagavatula
                   ` (15 subsequent siblings)
  20 siblings, 1 reply; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton, Anatoly Burakov; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS fastpath event device enqueue functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  21 +-
 drivers/event/cnxk/cn20k_worker.c   | 386 ++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  22 ++
 drivers/event/cnxk/meson.build      |   1 +
 4 files changed, 429 insertions(+), 1 deletion(-)
 create mode 100644 drivers/event/cnxk/cn20k_worker.c
 create mode 100644 drivers/event/cnxk/cn20k_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 6a80bddb7c..f20b7c16b6 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -5,6 +5,7 @@
 #include "roc_api.h"
 
 #include "cn20k_eventdev.h"
+#include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -106,6 +107,22 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+
+static void
+cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(RTE_ARCH_ARM64)
+
+	event_dev->enqueue = cn20k_sso_hws_enq;
+	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
+	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
+	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -261,8 +278,10 @@ cn20k_sso_init(struct rte_eventdev *event_dev)
 
 	event_dev->dev_ops = &cn20k_sso_dev_ops;
 	/* For secondary processes, the primary has done all the work */
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		cn20k_sso_fp_fns_set(event_dev);
 		return 0;
+	}
 
 	dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 	rc = cnxk_sso_init(event_dev);
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
new file mode 100644
index 0000000000..16770bcd05
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include <rte_vect.h>
+
+#include "roc_api.h"
+
+#include "cn20k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+/* SSO Operations */
+
+static __rte_always_inline uint8_t
+cn20k_sso_hws_new_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint64_t event_ptr = ev->u64;
+	const uint16_t grp = ev->queue_id;
+
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
+	if (ws->xaq_lmt <= *ws->fc_mem)
+		return 0;
+
+	cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grp_base + (grp << 12));
+	return 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_swtag(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
+
+	/* CNXK model
+	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
+	 *
+	 * SSO_TT_ORDERED        norm           norm             untag
+	 * SSO_TT_ATOMIC         norm           norm		   untag
+	 * SSO_TT_UNTAGGED       norm           norm             NOOP
+	 */
+
+	if (new_tt == SSO_TT_UNTAGGED) {
+		if (cur_tt != SSO_TT_UNTAGGED)
+			cnxk_sso_hws_swtag_untag(ws->base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+	} else {
+		cnxk_sso_hws_swtag_norm(tag, new_tt, ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
+	}
+	ws->swtag_req = 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_group(struct cn20k_sso_hws *ws, const struct rte_event *ev, const uint16_t grp)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+
+	plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+	cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_forward_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint8_t grp = ev->queue_id;
+
+	/* Group hasn't changed, Use SWTAG to forward the event */
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
+		cn20k_sso_hws_fwd_swtag(ws, ev);
+	else
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
+		cn20k_sso_hws_fwd_group(ws, ev, grp);
+}
+
+static inline int32_t
+sso_read_xaq_space(struct cn20k_sso_hws *ws)
+{
+	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, rte_memory_order_relaxed)) * 352;
+}
+
+static inline void
+sso_lmt_aw_wait_fc(struct cn20k_sso_hws *ws, int64_t req)
+{
+	int64_t cached, refill;
+
+retry:
+	while (__atomic_load_n(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
+		;
+
+	cached = __atomic_fetch_sub(ws->fc_cache_space, req, rte_memory_order_acquire) - req;
+	/* Check if there is enough space, else update and retry. */
+	if (cached < 0) {
+		/* Check if we have space else retry. */
+		do {
+			refill = sso_read_xaq_space(ws);
+		} while (refill <= 0);
+		__atomic_compare_exchange(ws->fc_cache_space, &cached, &refill, 0,
+					  rte_memory_order_release, rte_memory_order_relaxed);
+		goto retry;
+	}
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq(void *port, const struct rte_event *ev)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	switch (ev->op) {
+	case RTE_EVENT_OP_NEW:
+		return cn20k_sso_hws_new_event(ws, ev);
+	case RTE_EVENT_OP_FORWARD:
+		cn20k_sso_hws_forward_event(ws, ev);
+		break;
+	case RTE_EVENT_OP_RELEASE:
+		if (ws->swtag_req) {
+			cnxk_sso_hws_desched(ev->u64, ws->base);
+			ws->swtag_req = 0;
+			break;
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+#define VECTOR_SIZE_BITS	     0xFFFFFFFFFFF80000ULL
+#define VECTOR_GET_LINE_OFFSET(line) (19 + (3 * line))
+
+static uint64_t
+vector_size_partial_mask(uint16_t off, uint16_t cnt)
+{
+	return (VECTOR_SIZE_BITS & ~(~0x0ULL << off)) | ((uint64_t)(cnt - 1) << off);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_new_event_lmtst(struct cn20k_sso_hws *ws, uint8_t queue_id,
+			      const struct rte_event ev[], uint16_t n)
+{
+	uint16_t lines, partial_line, burst, left;
+	uint64_t wdata[2], pa[2] = {0};
+	uintptr_t lmt_addr;
+	uint16_t sz0, sz1;
+	uint16_t lmt_id;
+
+	sz0 = sz1 = 0;
+	lmt_addr = ws->aw_lmt;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	left = n;
+again:
+	burst = RTE_MIN(BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2 + ROC_LMT_LINES_PER_CORE_LOG2), left);
+
+	/* Set wdata */
+	lines = burst >> ROC_SSO_AW_PER_LMT_LINE_LOG2;
+	partial_line = burst & (BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2) - 1);
+	wdata[0] = wdata[1] = 0;
+	if (lines > BIT(ROC_LMT_LINES_PER_STR_LOG2)) {
+		wdata[0] = lmt_id;
+		wdata[0] |= 15ULL << 12;
+		wdata[0] |= VECTOR_SIZE_BITS;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = 16 << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+
+		wdata[1] = lmt_id + 16;
+		pa[1] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+
+		lines -= 17;
+		wdata[1] |= partial_line ? (uint64_t)(lines + 1) << 12 : (uint64_t)(lines << 12);
+		wdata[1] |= partial_line ? vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								    partial_line) :
+					   VECTOR_SIZE_BITS;
+		sz1 = burst - sz0;
+		partial_line = 0;
+	} else if (lines) {
+		/* We need to handle two cases here:
+		 * 1. Partial line spill over to wdata[1] i.e. lines == 16
+		 * 2. Partial line with spill lines < 16.
+		 */
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = lines << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+		if (lines == 16) {
+			wdata[0] |= 15ULL << 12;
+			wdata[0] |= VECTOR_SIZE_BITS;
+			if (partial_line) {
+				wdata[1] = lmt_id + 16;
+				pa[1] = (ws->grp_base + (queue_id << 12) +
+					 SSO_LF_GGRP_OP_AW_LMTST) |
+					((partial_line - 1) << 4);
+			}
+		} else {
+			lines -= 1;
+			wdata[0] |= partial_line ? (uint64_t)(lines + 1) << 12 :
+						   (uint64_t)(lines << 12);
+			wdata[0] |= partial_line ?
+					    vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								     partial_line) :
+					    VECTOR_SIZE_BITS;
+			sz0 += partial_line;
+		}
+		sz1 = burst - sz0;
+		partial_line = 0;
+	}
+
+	/* Only partial lines */
+	if (partial_line) {
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) |
+			((partial_line - 1) << 4);
+		sz0 = partial_line;
+		sz1 = burst - sz0;
+	}
+
+#if defined(RTE_ARCH_ARM64)
+	uint64x2_t aw_mask = {0xC0FFFFFFFFULL, ~0x0ULL};
+	uint64x2_t tt_mask = {0x300000000ULL, 0};
+	uint16_t parts;
+
+	while (burst) {
+		parts = burst > 7 ? 8 : plt_align32prevpow2(burst);
+		burst -= parts;
+		/* Lets try to fill at least one line per burst. */
+		switch (parts) {
+		case 8: {
+			uint64x2_t aw0, aw1, aw2, aw3, aw4, aw5, aw6, aw7;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+			aw4 = vandq_u64(vld1q_u64((const uint64_t *)&ev[4]), aw_mask);
+			aw5 = vandq_u64(vld1q_u64((const uint64_t *)&ev[5]), aw_mask);
+			aw6 = vandq_u64(vld1q_u64((const uint64_t *)&ev[6]), aw_mask);
+			aw7 = vandq_u64(vld1q_u64((const uint64_t *)&ev[7]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+			aw4 = vorrq_u64(vandq_u64(vshrq_n_u64(aw4, 6), tt_mask), aw4);
+			aw5 = vorrq_u64(vandq_u64(vshrq_n_u64(aw5, 6), tt_mask), aw5);
+			aw6 = vorrq_u64(vandq_u64(vshrq_n_u64(aw6, 6), tt_mask), aw6);
+			aw7 = vorrq_u64(vandq_u64(vshrq_n_u64(aw7, 6), tt_mask), aw7);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 64), aw4);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 80), aw5);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 96), aw6);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 112), aw7);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 128);
+		} break;
+		case 4: {
+			uint64x2_t aw0, aw1, aw2, aw3;
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 64);
+		} break;
+		case 2: {
+			uint64x2_t aw0, aw1;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 32);
+		} break;
+		case 1: {
+			__uint128_t aw0;
+
+			aw0 = ev[0].u64;
+			aw0 <<= 64;
+			aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+			aw0 |= (uint64_t)ev[0].sched_type << 32;
+
+			*((__uint128_t *)lmt_addr) = aw0;
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+		} break;
+		}
+		ev += parts;
+	}
+#else
+	uint16_t i;
+
+	for (i = 0; i < burst; i++) {
+		__uint128_t aw0;
+
+		aw0 = ev[0].u64;
+		aw0 <<= 64;
+		aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+		aw0 |= (uint64_t)ev[0].sched_type << 32;
+		*((__uint128_t *)lmt_addr) = aw0;
+		lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+	}
+#endif
+
+	/* wdata[0] will be always valid */
+	sso_lmt_aw_wait_fc(ws, sz0);
+	roc_lmt_submit_steorl(wdata[0], pa[0]);
+	if (wdata[1]) {
+		sso_lmt_aw_wait_fc(ws, sz1);
+		roc_lmt_submit_steorl(wdata[1], pa[1]);
+	}
+
+	left -= (sz0 + sz1);
+	if (left)
+		goto again;
+
+	return n;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_enq(port, ev);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	uint16_t idx = 0, done = 0, rc = 0;
+	struct cn20k_sso_hws *ws = port;
+	uint8_t queue_id;
+	int32_t space;
+
+	/* Do a common back-pressure check and return */
+	space = sso_read_xaq_space(ws) - 352;
+	if (space <= 0)
+		return 0;
+	nb_events = space < nb_events ? space : nb_events;
+
+	do {
+		queue_id = ev[idx].queue_id;
+		for (idx = idx + 1; idx < nb_events; idx++)
+			if (queue_id != ev[idx].queue_id)
+				break;
+
+		rc = cn20k_sso_hws_new_event_lmtst(ws, queue_id, &ev[done], idx - done);
+		if (rc != (idx - done))
+			return rc + done;
+		done += rc;
+
+	} while (done < nb_events);
+
+	return done;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	cn20k_sso_hws_forward_event(ws, ev);
+
+	return 1;
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
new file mode 100644
index 0000000000..97da0a844b
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_WORKER_H__
+#define __CN20K_WORKER_H__
+
+#include <rte_eventdev.h>
+
+#include "cnxk_worker.h"
+#include "cn20k_eventdev.h"
+
+/* CN20K Fastpath functions. */
+uint16_t __rte_hot cn20k_sso_hws_enq(void *port, const struct rte_event *ev);
+uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
+					   uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index ec74a0d6ce..5989d7b0d5 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -328,6 +328,7 @@ endif
 if soc_type == 'cn20k' or soc_type == 'all'
 sources += files(
         'cn20k_eventdev.c',
+        'cn20k_worker.c',
 )
 endif
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 07/20] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (4 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 06/20] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-22  1:49   ` Stephen Hemminger
  2024-10-03 13:22 ` [PATCH 08/20] event/cnxk: add CN20K event port quiesce pbhagavatula
                   ` (14 subsequent siblings)
  20 siblings, 1 reply; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS event dequeue fastpath functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |   8 ++
 drivers/event/cnxk/cn20k_worker.c   |  54 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   | 137 +++++++++++++++++++++++++++-
 3 files changed, 198 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index f20b7c16b6..277366aa82 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -112,12 +112,20 @@ static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
 
 	event_dev->enqueue = cn20k_sso_hws_enq;
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
+	event_dev->dequeue = cn20k_sso_hws_deq;
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
+	if (dev->deq_tmo_ns) {
+		event_dev->dequeue = cn20k_sso_hws_tmo_deq;
+		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
+	}
+
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 16770bcd05..447050f5c2 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -384,3 +384,57 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 
 	return 1;
 }
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(timeout_ticks);
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return 1;
+	}
+
+	return cn20k_sso_hws_get_work(ws, ev, 0);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, 0);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, 0);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 97da0a844b..d14cfe36e4 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -7,8 +7,136 @@
 
 #include <rte_eventdev.h>
 
-#include "cnxk_worker.h"
 #include "cn20k_eventdev.h"
+#include "cnxk_worker.h"
+
+static __rte_always_inline void
+cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
+{
+	RTE_SET_USED(ws);
+	RTE_SET_USED(flags);
+
+	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
+		 (u64[0] & 0xffffffff);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+	gw.get_work = ws->gw_wdata;
+#if defined(RTE_ARCH_ARM64)
+#if defined(__clang__)
+	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbz %[x0], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(".arch armv8-a+lse\n"
+		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+	gw.u64[0] = x0;
+	gw.u64[1] = x1;
+#else
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbz %[wdata], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [wdata] "=&r"(gw.get_work)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+		     : [wdata] "+r"(gw.get_work)
+		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+#endif
+#else
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
+#endif
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
+
+/* Used in cleaning up workslot. */
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work_empty(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+#ifdef RTE_ARCH_ARM64
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbz %[tag], 63, .Ldone%=		\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbnz %[tag], 63, .Lrty%=		\n"
+		     ".Ldone%=:	dmb ld					\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
+		     : "memory");
+#else
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+#endif
+
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+	else
+		gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+			    (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
 
 /* CN20K Fastpath functions. */
 uint16_t __rte_hot cn20k_sso_hws_enq(void *port, const struct rte_event *ev);
@@ -19,4 +147,11 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 
+uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+					   uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
+					       uint16_t nb_events, uint64_t timeout_ticks);
+
 #endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 08/20] event/cnxk: add CN20K event port quiesce
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (5 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 07/20] event/cnxk: add CN20K SSO dequeue " pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 09/20] event/cnxk: add CN20K event port profile switch pbhagavatula
                   ` (13 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port quiesce function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 60 +++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 277366aa82..4c146c1b32 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -208,6 +208,65 @@ cn20k_sso_port_release(void *port)
 	rte_free(gws_cookie);
 }
 
+static void
+cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+		       rte_eventdev_port_flush_t flush_cb, void *args)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct cn20k_sso_hws *ws = port;
+	struct rte_event ev;
+	uint64_t ptag;
+	bool is_pend;
+
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+		is_pend = true;
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	if (is_pend && ev.u64)
+		if (flush_cb)
+			flush_cb(event_dev->data->dev_id, ev, args);
+	ptag = (plt_read64(ws->base + SSOW_LF_GWS_TAG) >> 32) & SSO_TT_EMPTY;
+	if (ptag != SSO_TT_EMPTY)
+		cnxk_sso_hws_swtag_flush(ws->base);
+
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & BIT_ULL(56));
+
+	/* Check if we have work in PRF_WQE0, if so extract it. */
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (ev.u64) {
+			if (flush_cb)
+				flush_cb(event_dev->data->dev_id, ev, args);
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		do {
+			ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (ptag & BIT_ULL(56));
+	}
+	ws->swtag_req = 0;
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
 			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
@@ -265,6 +324,7 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_def_conf = cnxk_sso_port_def_conf,
 	.port_setup = cn20k_sso_port_setup,
 	.port_release = cn20k_sso_port_release,
+	.port_quiesce = cn20k_sso_port_quiesce,
 	.port_link = cn20k_sso_port_link,
 	.port_unlink = cn20k_sso_port_unlink,
 	.port_link_profile = cn20k_sso_port_link_profile,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 09/20] event/cnxk: add CN20K event port profile switch
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (6 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 08/20] event/cnxk: add CN20K event port quiesce pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 10/20] event/cnxk: add CN20K device start pbhagavatula
                   ` (12 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port profile switch.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  1 +
 drivers/event/cnxk/cn20k_worker.c   | 11 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  1 +
 3 files changed, 13 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 4c146c1b32..079c7809f6 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -126,6 +126,7 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 	}
 
+	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 447050f5c2..60554b85a0 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -385,6 +385,17 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 	return 1;
 }
 
+int __rte_hot
+cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(0xFFUL);
+	ws->gw_wdata |= (profile + 1);
+
+	return 0;
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index d14cfe36e4..f9e6a394f2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -146,6 +146,7 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 					       uint16_t nb_events);
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
+int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 10/20] event/cnxk: add CN20K device start
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (7 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 09/20] event/cnxk: add CN20K event port profile switch pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 11/20] event/cnxk: add CN20K device stop and close pbhagavatula
                   ` (11 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K start function along with few cleanup API's to maintain
sanity.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c | 103 +--------------------------
 drivers/event/cnxk/cn20k_eventdev.c |  76 ++++++++++++++++++++
 drivers/event/cnxk/cnxk_common.h    | 104 ++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 100 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 1963896aa2..217b7deb92 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -153,83 +153,6 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
 	return 0;
 }
 
-static void
-cn10k_sso_hws_reset(void *arg, void *hws)
-{
-	struct cnxk_sso_evdev *dev = arg;
-	struct cn10k_sso_hws *ws = hws;
-	uintptr_t base = ws->base;
-	uint64_t pend_state;
-	union {
-		__uint128_t wdata;
-		uint64_t u64[2];
-	} gw;
-	uint8_t pend_tt;
-	bool is_pend;
-
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
-	/* Wait till getwork/swtp/waitw/desched completes. */
-	is_pend = false;
-	/* Work in WQE0 is always consumed, unless its a SWTAG. */
-	pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
-	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
-	    ws->swtag_req)
-		is_pend = true;
-
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
-			       BIT_ULL(56) | BIT_ULL(54)));
-	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
-			cnxk_sso_hws_swtag_untag(base +
-						 SSOW_LF_GWS_OP_SWTAG_UNTAG);
-		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-	} else if (pend_tt != SSO_TT_EMPTY) {
-		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-	}
-
-	/* Wait for desched to complete. */
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
-
-	switch (dev->gw_mode) {
-	case CNXK_GW_MODE_PREF:
-	case CNXK_GW_MODE_PREF_WFE:
-		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
-			;
-		break;
-	case CNXK_GW_MODE_NONE:
-	default:
-		break;
-	}
-
-	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
-	    SSO_TT_EMPTY) {
-		plt_write64(BIT_ULL(16) | 1,
-			    ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		do {
-			roc_load_pair(gw.u64[0], gw.u64[1],
-				      ws->base + SSOW_LF_GWS_WQE0);
-		} while (gw.u64[0] & BIT_ULL(63));
-		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-			if (pend_tt == SSO_TT_ATOMIC ||
-			    pend_tt == SSO_TT_ORDERED)
-				cnxk_sso_hws_swtag_untag(
-					base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
-			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-		}
-	}
-
-	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	rte_mb();
-}
-
 static void
 cn10k_sso_set_rsrc(void *arg)
 {
@@ -701,24 +624,6 @@ cn10k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn10k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
-static void
-cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev)
-{
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
-	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
-	int i, rc;
-
-	plt_sso_dbg();
-	for (i = 0; i < dev->stash_cnt; i++) {
-		stash[i].hwgrp = dev->stash_parse_data[i].queue;
-		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
-		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
-	}
-	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
-	if (rc < 0)
-		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
-}
-
 static int
 cn10k_sso_start(struct rte_eventdev *event_dev)
 {
@@ -730,9 +635,8 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
 	if (rc < 0)
 		return rc;
 
-	cn10k_sso_configure_queue_stash(event_dev);
-	rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
-			    cn10k_sso_hws_flush_events);
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 	if (rc < 0)
 		return rc;
 	cn10k_sso_fp_fns_set(event_dev);
@@ -753,8 +657,7 @@ cn10k_sso_stop(struct rte_eventdev *event_dev)
 	for (i = 0; i < event_dev->data->nb_ports; i++)
 		hws[i] = i;
 	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
-	cnxk_sso_stop(event_dev, cn10k_sso_hws_reset,
-		      cn10k_sso_hws_flush_events);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 }
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 079c7809f6..01be56469f 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -85,6 +85,61 @@ cn20k_sso_hws_release(void *arg, void *hws)
 	memset(ws, 0, sizeof(*ws));
 }
 
+static int
+cn20k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cnxk_handle_event_t fn,
+			   void *arg)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg);
+	uint64_t retry = CNXK_SSO_FLUSH_RETRY_MAX;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t cq_ds_cnt = 1;
+	uint64_t aq_cnt = 1;
+	uint64_t ds_cnt = 1;
+	struct rte_event ev;
+	uint64_t val, req;
+
+	plt_write64(0, base + SSO_LF_GGRP_QCTL);
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	req = queue_id;	    /* GGRP ID */
+	req |= BIT_ULL(18); /* Grouped */
+	req |= BIT_ULL(16); /* WAIT */
+
+	aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+	ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+	cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+	cq_ds_cnt &= 0x3FFF3FFF0000;
+
+	while (aq_cnt || cq_ds_cnt || ds_cnt) {
+		plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (fn != NULL && ev.u64 != 0)
+			fn(arg, ev);
+		if (ev.sched_type != SSO_TT_EMPTY)
+			cnxk_sso_hws_swtag_flush(ws->base);
+		else if (retry-- == 0)
+			break;
+		do {
+			val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (val & BIT_ULL(56));
+		aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+		ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+		cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+		/* Extract cq and ds count */
+		cq_ds_cnt &= 0x3FFF3FFF0000;
+	}
+
+	if (aq_cnt || cq_ds_cnt || ds_cnt)
+		return -EAGAIN;
+
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	rte_mb();
+
+	return 0;
+}
+
 static void
 cn20k_sso_set_rsrc(void *arg)
 {
@@ -313,6 +368,25 @@ cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
+static int
+cn20k_sso_start(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int rc, i;
+
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set(event_dev);
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -331,6 +405,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_link_profile = cn20k_sso_port_link_profile,
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
+
+	.dev_start = cn20k_sso_start,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
index 5f6aeb37eb..f2610bd1a8 100644
--- a/drivers/event/cnxk/cnxk_common.h
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -8,6 +8,15 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+struct cnxk_sso_hws_prf {
+	uint64_t base;
+	uint32_t gw_wdata;
+	void *lookup_mem;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+};
+
 static uint32_t
 cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
 {
@@ -31,4 +40,99 @@ cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
 	return wdata;
 }
 
+static void
+cnxk_sso_hws_reset(void *arg, void *ws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cnxk_sso_hws_prf *ws_prf;
+	uint64_t pend_state;
+	uint8_t swtag_req;
+	uintptr_t base;
+	uint8_t hws_id;
+	union {
+		__uint128_t wdata;
+		uint64_t u64[2];
+	} gw;
+	uint8_t pend_tt;
+	bool is_pend;
+
+	ws_prf = ws;
+	base = ws_prf->base;
+	hws_id = ws_prf->hws_id;
+	swtag_req = ws_prf->swtag_req;
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	/* Wait till getwork/swtp/waitw/desched completes. */
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || swtag_req)
+		is_pend = true;
+
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state &
+		 (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+			cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+	} else if (pend_tt != SSO_TT_EMPTY) {
+		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+	}
+
+	/* Wait for desched to complete. */
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, base + SSOW_LF_GWS_OP_GET_WORK0);
+		do {
+			roc_load_pair(gw.u64[0], gw.u64[1], base + SSOW_LF_GWS_WQE0);
+		} while (gw.u64[0] & BIT_ULL(63));
+		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+			if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+				cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+		}
+	}
+
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	rte_mb();
+}
+
+static void
+cnxk_sso_configure_queue_stash(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
+	int i, rc;
+
+	plt_sso_dbg();
+	for (i = 0; i < dev->stash_cnt; i++) {
+		stash[i].hwgrp = dev->stash_parse_data[i].queue;
+		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
+		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
+	}
+	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
+	if (rc < 0)
+		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+}
+
 #endif /* __CNXK_COMMON_H__ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 11/20] event/cnxk: add CN20K device stop and close
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (8 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 10/20] event/cnxk: add CN20K device start pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 12/20] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
                   ` (10 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event device stop and close callback functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 01be56469f..1bfad3c293 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -387,6 +387,25 @@ cn20k_sso_start(struct rte_eventdev *event_dev)
 	return rc;
 }
 
+static void
+cn20k_sso_stop(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int i;
+
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+}
+
+static int
+cn20k_sso_close(struct rte_eventdev *event_dev)
+{
+	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -407,6 +426,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
 	.dev_start = cn20k_sso_start,
+	.dev_stop = cn20k_sso_stop,
+	.dev_close = cn20k_sso_close,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 12/20] event/cnxk: add CN20K xstats, selftest and dump
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (9 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 11/20] event/cnxk: add CN20K device stop and close pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 13/20] event/cnxk: support CN20K Rx adapter pbhagavatula
                   ` (9 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add selftest to verify SSO, xstats to get queue specific
stats and add function to dump internal state of SSO.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test/test_eventdev.c                    |  7 +++++++
 drivers/event/cnxk/cn20k_eventdev.c         | 12 ++++++++++++
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  8 ++++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c
index e4e234dc98..4aacbd29f7 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -1377,6 +1377,12 @@ test_eventdev_selftest_cn10k(void)
 	return test_eventdev_selftest_impl("event_cn10k", "");
 }
 
+static int
+test_eventdev_selftest_cn20k(void)
+{
+	return test_eventdev_selftest_impl("event_cn20k", "");
+}
+
 #endif /* !RTE_EXEC_ENV_WINDOWS */
 
 REGISTER_FAST_TEST(eventdev_common_autotest, true, true, test_eventdev_common);
@@ -1388,5 +1394,6 @@ REGISTER_DRIVER_TEST(eventdev_selftest_dpaa2, test_eventdev_selftest_dpaa2);
 REGISTER_DRIVER_TEST(eventdev_selftest_dlb2, test_eventdev_selftest_dlb2);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn9k, test_eventdev_selftest_cn9k);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn10k, test_eventdev_selftest_cn10k);
+REGISTER_DRIVER_TEST(eventdev_selftest_cn20k, test_eventdev_selftest_cn20k);
 
 #endif /* !RTE_EXEC_ENV_WINDOWS */
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 1bfad3c293..e4fecc3e32 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -406,6 +406,12 @@ cn20k_sso_close(struct rte_eventdev *event_dev)
 	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
 }
 
+static int
+cn20k_sso_selftest(void)
+{
+	return cnxk_sso_selftest(RTE_STR(event_cn20k));
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -425,9 +431,15 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.xstats_get = cnxk_sso_xstats_get,
+	.xstats_reset = cnxk_sso_xstats_reset,
+	.xstats_get_names = cnxk_sso_xstats_get_names,
+
+	.dump = cnxk_sso_dump,
 	.dev_start = cn20k_sso_start,
 	.dev_stop = cn20k_sso_stop,
 	.dev_close = cn20k_sso_close,
+	.dev_selftest = cn20k_sso_selftest,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 7b5e3e5cf9..f7995faeca 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1566,16 +1566,16 @@ cnxk_sso_selftest(const char *dev_name)
 			return rc;
 	}
 
-	if (roc_model_runtime_is_cn10k()) {
-		printf("Verifying CN10K workslot getwork mode none\n");
+	if (roc_model_runtime_is_cn10k() || roc_model_runtime_is_cn20k()) {
+		printf("Verifying %s workslot getwork mode none\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode prefetch\n");
+		printf("Verifying %s workslot getwork mode prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
+		printf("Verifying %s workslot getwork mode smart prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 13/20] event/cnxk: support CN20K Rx adapter
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (10 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 12/20] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 14/20] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
                   ` (8 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for CN20K event eth Rx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 122 +++++++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_eventdev.h |   4 +
 2 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index e4fecc3e32..a3ef7f8747 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,6 +4,7 @@
 
 #include "roc_api.h"
 
+#include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
@@ -412,6 +413,118 @@ cn20k_sso_selftest(void)
 	return cnxk_sso_selftest(RTE_STR(event_cn20k));
 }
 
+static int
+cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+			      const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+	int rc;
+
+	RTE_SET_USED(event_dev);
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 9);
+	if (rc)
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+	else
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+	return 0;
+}
+
+static void
+cn20k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		ws->xaq_lmt = dev->xaq_lmt;
+		ws->fc_mem = (int64_t *)dev->fc_iova;
+		ws->tstamp = dev->tstamp;
+		if (lookup_mem)
+			ws->lookup_mem = lookup_mem;
+	}
+}
+
+static void
+eventdev_fops_tstamp_update(struct rte_eventdev *event_dev)
+{
+	struct rte_event_fp_ops *fp_op = rte_event_fp_ops + event_dev->data->dev_id;
+
+	fp_op->dequeue = event_dev->dequeue;
+	fp_op->dequeue_burst = event_dev->dequeue_burst;
+}
+
+static void
+cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct cnxk_eth_dev *cnxk_eth_dev = dev->data->dev_private;
+	struct rte_eventdev *event_dev = cnxk_eth_dev->evdev_priv;
+	struct cnxk_sso_evdev *evdev = cnxk_sso_pmd_priv(event_dev);
+
+	evdev->rx_offloads |= flags;
+	if (ptp_en)
+		evdev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
+	else
+		evdev->tstamp[port_id] = NULL;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	eventdev_fops_tstamp_update(event_dev);
+}
+
+static int
+cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
+			       const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash;
+	struct cn20k_eth_rxq *rxq;
+	void *lookup_mem;
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	if (rc)
+		return -EINVAL;
+
+	cnxk_eth_dev->cnxk_sso_ptp_tstamp_cb = cn20k_sso_tstamp_hdl_update;
+	cnxk_eth_dev->evdev_priv = (struct rte_eventdev *)(uintptr_t)event_dev;
+
+	rxq = eth_dev->data->rx_queues[0];
+	lookup_mem = rxq->lookup_mem;
+	cn20k_sso_set_priv_mem(event_dev, lookup_mem);
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	if (roc_feature_sso_has_stash() && dev->nb_event_ports > 1) {
+		stash.hwgrp = queue_conf->ev.queue_id;
+		stash.stash_offset = CN20K_SSO_DEFAULT_STASH_OFFSET;
+		stash.stash_count = CN20K_SSO_DEFAULT_STASH_LENGTH;
+		rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1);
+		if (rc < 0)
+			plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+	}
+
+	return 0;
+}
+
+static int
+cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id)
+{
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -431,6 +544,12 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.eth_rx_adapter_caps_get = cn20k_sso_rx_adapter_caps_get,
+	.eth_rx_adapter_queue_add = cn20k_sso_rx_adapter_queue_add,
+	.eth_rx_adapter_queue_del = cn20k_sso_rx_adapter_queue_del,
+	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -509,4 +628,5 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_STASH "=<string>"
-			      CNXK_SSO_GW_MODE "=<int>");
+			      CNXK_SSO_GW_MODE "=<int>"
+			      CNXK_SSO_FORCE_BP "=1");
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 50a9df77cb..ce12628f81 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -11,9 +11,13 @@
 struct __rte_cache_aligned cn20k_sso_hws {
 	uint64_t base;
 	uint32_t gw_wdata;
+	void *lookup_mem;
 	uint64_t gw_rdata;
 	uint8_t swtag_req;
 	uint8_t hws_id;
+	/* PTP timestamp */
+	struct cnxk_timesync_info **tstamp;
+	uint64_t meta_aura;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
 	int64_t *fc_cache_space;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 14/20] event/cnxk: support CN20K Rx adapter fast path
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (11 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 13/20] event/cnxk: support CN20K Rx adapter pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 15/20] event/cnxk: support CN20K Tx adapter pbhagavatula
                   ` (7 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c           | 192 +++++++++++++++++-
 drivers/event/cnxk/cn20k_worker.c             |  54 -----
 drivers/event/cnxk/cn20k_worker.h             | 186 ++++++++++++++++-
 drivers/event/cnxk/deq/cn20k/deq_0_15.c       |  20 ++
 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_0_15_seg.c   |  20 ++
 .../event/cnxk/deq/cn20k/deq_0_15_seg_burst.c |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo.c   |  20 ++
 .../event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c |  22 ++
 .../event/cnxk/deq/cn20k/deq_0_15_tmo_seg.c   |  20 ++
 .../cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c   |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_112_127.c    |  20 ++
 .../event/cnxk/deq/cn20k/deq_112_127_burst.c  |  22 ++
 .../event/cnxk/deq/cn20k/deq_112_127_seg.c    |  20 ++
 .../cnxk/deq/cn20k/deq_112_127_seg_burst.c    |  22 ++
 .../event/cnxk/deq/cn20k/deq_112_127_tmo.c    |  20 ++
 .../cnxk/deq/cn20k/deq_112_127_tmo_burst.c    |  22 ++
 .../cnxk/deq/cn20k/deq_112_127_tmo_seg.c      |  17 ++
 .../deq/cn20k/deq_112_127_tmo_seg_burst.c     |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_16_31.c      |  20 ++
 .../event/cnxk/deq/cn20k/deq_16_31_burst.c    |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_16_31_seg.c  |  20 ++
 .../cnxk/deq/cn20k/deq_16_31_seg_burst.c      |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo.c  |  20 ++
 .../cnxk/deq/cn20k/deq_16_31_tmo_burst.c      |  22 ++
 .../event/cnxk/deq/cn20k/deq_16_31_tmo_seg.c  |  21 ++
 .../cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c  |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_32_47.c      |  20 ++
 .../event/cnxk/deq/cn20k/deq_32_47_burst.c    |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_32_47_seg.c  |  20 ++
 .../cnxk/deq/cn20k/deq_32_47_seg_burst.c      |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo.c  |  20 ++
 .../cnxk/deq/cn20k/deq_32_47_tmo_burst.c      |  23 +++
 .../event/cnxk/deq/cn20k/deq_32_47_tmo_seg.c  |  21 ++
 .../cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c  |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_48_63.c      |  20 ++
 .../event/cnxk/deq/cn20k/deq_48_63_burst.c    |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_48_63_seg.c  |  20 ++
 .../cnxk/deq/cn20k/deq_48_63_seg_burst.c      |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo.c  |  20 ++
 .../cnxk/deq/cn20k/deq_48_63_tmo_burst.c      |  22 ++
 .../event/cnxk/deq/cn20k/deq_48_63_tmo_seg.c  |  21 ++
 .../cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c  |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_64_79.c      |  20 ++
 .../event/cnxk/deq/cn20k/deq_64_79_burst.c    |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_64_79_seg.c  |  20 ++
 .../cnxk/deq/cn20k/deq_64_79_seg_burst.c      |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo.c  |  20 ++
 .../cnxk/deq/cn20k/deq_64_79_tmo_burst.c      |  22 ++
 .../event/cnxk/deq/cn20k/deq_64_79_tmo_seg.c  |  20 ++
 .../cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c  |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_80_95.c      |  20 ++
 .../event/cnxk/deq/cn20k/deq_80_95_burst.c    |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_80_95_seg.c  |  20 ++
 .../cnxk/deq/cn20k/deq_80_95_seg_burst.c      |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo.c  |  20 ++
 .../cnxk/deq/cn20k/deq_80_95_tmo_burst.c      |  22 ++
 .../event/cnxk/deq/cn20k/deq_80_95_tmo_seg.c  |  20 ++
 .../cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c  |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_96_111.c     |  20 ++
 .../event/cnxk/deq/cn20k/deq_96_111_burst.c   |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_96_111_seg.c |  20 ++
 .../cnxk/deq/cn20k/deq_96_111_seg_burst.c     |  22 ++
 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo.c |  20 ++
 .../cnxk/deq/cn20k/deq_96_111_tmo_burst.c     |  22 ++
 .../event/cnxk/deq/cn20k/deq_96_111_tmo_seg.c |  20 ++
 .../cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c |  22 ++
 .../event/cnxk/deq/cn20k/deq_all_offload.c    |  77 +++++++
 drivers/event/cnxk/meson.build                |  75 +++++++
 69 files changed, 1857 insertions(+), 72 deletions(-)
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a3ef7f8747..f5be3f3e89 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -11,6 +11,9 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+#define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
+	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -163,25 +166,192 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+#if defined(RTE_ARCH_ARM64)
+static inline void
+cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
+{
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	const event_dequeue_t sso_hws_deq[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_deq_tmo[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_deq_seg[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_seg_##name,
+
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_deq_tmo_seg[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_seg_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_reas_deq[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_reas_deq_tmo[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_reas_deq_seg[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_seg_##name,
+
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_reas_deq_tmo_seg[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_seg_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue, sso_hws_reas_deq_seg);
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_seg_burst);
+			if (dev->is_timeout_deq) {
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue,
+						       sso_hws_reas_deq_tmo_seg);
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_seg_burst);
+			}
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue, sso_hws_deq_seg);
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_deq_seg_burst);
+
+			if (dev->is_timeout_deq) {
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue,
+						       sso_hws_deq_tmo_seg);
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_seg_burst);
+			}
+		}
+	} else {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue, sso_hws_reas_deq);
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_burst);
+
+			if (dev->is_timeout_deq) {
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue,
+						       sso_hws_reas_deq_tmo);
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_burst);
+			}
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue, sso_hws_deq);
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst, sso_hws_deq_burst);
+
+			if (dev->is_timeout_deq) {
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue, sso_hws_deq_tmo);
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_burst);
+			}
+		}
+	}
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
+static inline void
+cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	event_dev->dequeue = cn20k_sso_hws_deq_all_offload;
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
+	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F) {
+		event_dev->dequeue = cn20k_sso_hws_deq_all_offload_tst;
+		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+	}
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+#endif
 
 static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	cn20k_sso_fp_blk_fns_set(event_dev);
+	cn20k_sso_fp_tmplt_fns_set(event_dev);
 
 	event_dev->enqueue = cn20k_sso_hws_enq;
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
-	event_dev->dequeue = cn20k_sso_hws_deq;
-	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
-	if (dev->deq_tmo_ns) {
-		event_dev->dequeue = cn20k_sso_hws_tmo_deq;
-		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
-	}
-
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 #else
 	RTE_SET_USED(event_dev);
@@ -284,7 +454,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
 	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
 
-	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	cn20k_sso_hws_get_work_empty(ws, &ev,
+				     (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 	if (is_pend && ev.u64)
 		if (flush_cb)
 			flush_cb(event_dev->data->dev_id, ev, args);
@@ -310,7 +481,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
 		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		cn20k_sso_hws_get_work_empty(
+			ws, &ev, (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 		if (ev.u64) {
 			if (flush_cb)
 				flush_cb(event_dev->data->dev_id, ev, args);
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 60554b85a0..6953b12e8f 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -395,57 +395,3 @@ cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
 
 	return 0;
 }
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return 1;
-	}
-
-	return cn20k_sso_hws_get_work(ws, ev, 0);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return ret;
-	}
-
-	ret = cn20k_sso_hws_get_work(ws, ev, 0);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn20k_sso_hws_get_work(ws, ev, 0);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			    uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index f9e6a394f2..5b33dcd97d 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -8,16 +8,64 @@
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
+#include "cn20k_rx.h"
 #include "cnxk_worker.h"
 
+/* CN20K Rx event fastpath */
+
+static __rte_always_inline void
+cn20k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id, const uint32_t tag,
+		  const uint32_t flags, const void *const lookup_mem, uintptr_t cpth,
+		  uintptr_t sa_base)
+{
+	const uint64_t mbuf_init =
+		0x100010000ULL | RTE_PKTMBUF_HEADROOM | (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+	struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf;
+
+	cn20k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, (struct rte_mbuf *)mbuf, lookup_mem,
+			      mbuf_init | ((uint64_t)port_id) << 48, cpth, sa_base, flags);
+}
+
+static void
+cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info *tstamp)
+{
+	uint64_t tstamp_ptr;
+	uint8_t laptr;
+
+	laptr = (uint8_t)*(uint64_t *)(u64 + (CNXK_SSO_WQE_LAYR_PTR * sizeof(uint64_t)));
+	if (laptr == sizeof(uint64_t)) {
+		/* Extracting tstamp, if PTP enabled*/
+		tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64) + CNXK_SSO_WQE_SG_PTR);
+		cn20k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, true,
+					 (uint64_t *)tstamp_ptr);
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
-	RTE_SET_USED(ws);
-	RTE_SET_USED(flags);
+	uintptr_t sa_base = 0;
 
 	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
 		 (u64[0] & 0xffffffff);
+	if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		uintptr_t cpth = 0;
+		uint64_t mbuf;
+
+		mbuf = u64[1] - sizeof(struct rte_mbuf);
+		rte_prefetch0((void *)mbuf);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(((struct rte_mbuf *)mbuf)->pool, (void **)&mbuf, 1, 1);
+
+		u64[0] = CNXK_CLR_SUB_EVENT(u64[0]);
+		cn20k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xFFFFF, flags, ws->lookup_mem, cpth,
+				  sa_base);
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
+		u64[1] = mbuf;
+	}
 }
 
 static __rte_always_inline uint16_t
@@ -148,11 +196,133 @@ uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_even
 					       uint16_t nb_events);
 int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
 
-uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-					   uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-					       uint16_t nb_events, uint64_t timeout_ticks);
+#define R(name, flags)                                                                             \
+	uint16_t __rte_hot cn20k_sso_hws_deq_##name(void *port, struct rte_event *ev,              \
+						    uint64_t timeout_ticks);                       \
+	uint16_t __rte_hot cn20k_sso_hws_deq_burst_##name(                                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_##name(void *port, struct rte_event *ev,          \
+							uint64_t timeout_ticks);                   \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_##name(void *port, struct rte_event *ev,           \
+						       uint64_t timeout_ticks);                    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_burst_##name(                                      \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_##name(void *port, struct rte_event *ev,       \
+							   uint64_t timeout_ticks);                \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_seg_##name(void *port, struct rte_event *ev,          \
+							uint64_t timeout_ticks);                   \
+	uint16_t __rte_hot cn20k_sso_hws_deq_seg_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_seg_##name(void *port, struct rte_event *ev,      \
+							    uint64_t timeout_ticks);               \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_seg_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_seg_##name(void *port, struct rte_event *ev,       \
+							   uint64_t timeout_ticks);                \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_seg_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_seg_##name(void *port, struct rte_event *ev,   \
+							       uint64_t timeout_ticks);            \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_seg_burst_##name(                              \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_##name(void *port, struct rte_event *ev,         \
+							 uint64_t timeout_ticks);                  \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_burst_##name(                                    \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_##name(void *port, struct rte_event *ev,     \
+							     uint64_t timeout_ticks);              \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_##name(void *port, struct rte_event *ev,      \
+							    uint64_t timeout_ticks);               \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_##name(void *port, struct rte_event *ev,  \
+								uint64_t timeout_ticks);           \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_seg_##name(void *port, struct rte_event *ev,     \
+							     uint64_t timeout_ticks);              \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_seg_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_seg_##name(void *port, struct rte_event *ev, \
+								 uint64_t timeout_ticks);          \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_seg_burst_##name(                            \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_seg_##name(void *port, struct rte_event *ev,  \
+								uint64_t timeout_ticks);           \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_seg_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_seg_##name(                               \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);                         \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_seg_burst_##name(                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_DEQ(fn, flags)                                                                         \
+	uint16_t __rte_hot fn(void *port, struct rte_event *ev, uint64_t timeout_ticks)            \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(timeout_ticks);                                                       \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return 1;                                                                  \
+		}                                                                                  \
+		return cn20k_sso_hws_get_work(ws, ev, flags);                                      \
+	}
+
+#define SSO_DEQ_SEG(fn, flags) SSO_DEQ(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define SSO_DEQ_TMO(fn, flags)                                                                     \
+	uint16_t __rte_hot fn(void *port, struct rte_event *ev, uint64_t timeout_ticks)            \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint16_t ret = 1;                                                                  \
+		uint64_t iter;                                                                     \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return ret;                                                                \
+		}                                                                                  \
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);                                       \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)                         \
+			ret = cn20k_sso_hws_get_work(ws, ev, flags);                               \
+		return ret;                                                                        \
+	}
+
+#define SSO_DEQ_TMO_SEG(fn, flags) SSO_DEQ_TMO(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define SSO_CMN_DEQ_BURST(fnb, fn, flags)                                                          \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+#define SSO_CMN_DEQ_SEG_BURST(fnb, fn, flags)                                                      \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_deq_all_offload(void *port, struct rte_event *ev,
+						 uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[],
+						       uint16_t nb_events, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_all_offload_tst(void *port, struct rte_event *ev,
+						     uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[],
+							   uint16_t nb_events,
+							   uint64_t timeout_ticks);
 
 #endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15.c b/drivers/event/cnxk/deq/cn20k/deq_0_15.c
new file mode 100644
index 0000000000..3da59d7956
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                                             \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                                                   \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
new file mode 100644
index 0000000000..f7e0e8fe71
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_seg.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg.c
new file mode 100644
index 0000000000..7eada0004c
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                       \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
new file mode 100644
index 0000000000..7d5d4823c3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		      cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo.c
new file mode 100644
index 0000000000..a3fc8ab7cc
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                       \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
new file mode 100644
index 0000000000..1bdc4bc82d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg.c
new file mode 100644
index 0000000000..2cb27b2adf
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)               \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
new file mode 100644
index 0000000000..d3ed5fcac0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                                             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,                                  \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)                                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,                             \
+			  cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127.c b/drivers/event/cnxk/deq/cn20k/deq_112_127.c
new file mode 100644
index 0000000000..8729b68ce1
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                               \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
new file mode 100644
index 0000000000..29c21441cf
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_seg.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg.c
new file mode 100644
index 0000000000..cd93bb0347
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                       \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
new file mode 100644
index 0000000000..004b5ecb95
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo.c
new file mode 100644
index 0000000000..e5b86bec9b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                       \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
new file mode 100644
index 0000000000..d544b39e9e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg.c
new file mode 100644
index 0000000000..9b3e581a1a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)               \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
new file mode 100644
index 0000000000..ba7a1207ad
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31.c b/drivers/event/cnxk/deq/cn20k/deq_16_31.c
new file mode 100644
index 0000000000..e5cee1e782
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                               \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
new file mode 100644
index 0000000000..eb7382e9d9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_seg.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg.c
new file mode 100644
index 0000000000..9dcf21f9b5
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                       \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
new file mode 100644
index 0000000000..770b7221e6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo.c
new file mode 100644
index 0000000000..5fdda01ff1
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                       \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
new file mode 100644
index 0000000000..1e71d22fc3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg.c
new file mode 100644
index 0000000000..3c43b23e6f
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)               \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name,                 \
+			flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
new file mode 100644
index 0000000000..1a9e7efa0a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47.c b/drivers/event/cnxk/deq/cn20k/deq_32_47.c
new file mode 100644
index 0000000000..0fa7812fa2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                               \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
new file mode 100644
index 0000000000..3d51bd6659
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_seg.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg.c
new file mode 100644
index 0000000000..23a8808dd7
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                       \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
new file mode 100644
index 0000000000..851b5b7d31
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo.c
new file mode 100644
index 0000000000..9f7dec9252
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                       \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
new file mode 100644
index 0000000000..038ba726a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name,                   \
+			  flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg.c
new file mode 100644
index 0000000000..4457e6e408
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)               \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name,                 \
+			flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
new file mode 100644
index 0000000000..68fb3ff53d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63.c b/drivers/event/cnxk/deq/cn20k/deq_48_63.c
new file mode 100644
index 0000000000..d4eae4cdd0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                               \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
new file mode 100644
index 0000000000..84f3ccd39c
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_seg.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg.c
new file mode 100644
index 0000000000..744848b27a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                       \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
new file mode 100644
index 0000000000..417f622412
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo.c
new file mode 100644
index 0000000000..04efb325a1
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                       \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
new file mode 100644
index 0000000000..7fbea69134
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg.c
new file mode 100644
index 0000000000..1039ebcd6b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)               \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name,                 \
+			flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
new file mode 100644
index 0000000000..3bee216768
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79.c b/drivers/event/cnxk/deq/cn20k/deq_64_79.c
new file mode 100644
index 0000000000..f8a76e0ab7
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                               \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
new file mode 100644
index 0000000000..9b341a0df5
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_seg.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg.c
new file mode 100644
index 0000000000..ad37823c73
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                       \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
new file mode 100644
index 0000000000..1f051f74a9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			   cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo.c
new file mode 100644
index 0000000000..f8f9235af8
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                       \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
new file mode 100644
index 0000000000..c134e27f25
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg.c
new file mode 100644
index 0000000000..054bbc4343
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)               \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
new file mode 100644
index 0000000000..849e8e12fc
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		 cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95.c b/drivers/event/cnxk/deq/cn20k/deq_80_95.c
new file mode 100644
index 0000000000..26ae2d9c74
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                               \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
new file mode 100644
index 0000000000..9724caf5d6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_seg.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg.c
new file mode 100644
index 0000000000..ce163d4a6b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                       \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
new file mode 100644
index 0000000000..997c208511
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo.c
new file mode 100644
index 0000000000..cdfe507303
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                       \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
new file mode 100644
index 0000000000..bcf32e646b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg.c
new file mode 100644
index 0000000000..d3746232f8
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)               \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
new file mode 100644
index 0000000000..b24e73439a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111.c b/drivers/event/cnxk/deq/cn20k/deq_96_111.c
new file mode 100644
index 0000000000..f6950a2564
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                               \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
new file mode 100644
index 0000000000..c03d034b66
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_seg.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg.c
new file mode 100644
index 0000000000..67407bb892
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                       \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
new file mode 100644
index 0000000000..b37ef7a998
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo.c
new file mode 100644
index 0000000000..436ad7f5be
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                       \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
new file mode 100644
index 0000000000..da76b589a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg.c
new file mode 100644
index 0000000000..0b5c11d5fa
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)               \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
new file mode 100644
index 0000000000..3a8c02e4d2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_all_offload.c b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
new file mode 100644
index 0000000000..99470d00c2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_all_offload(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_all_offload_tst(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_TSTAMP_F | NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[], uint16_t nb_events,
+				    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_deq_all_offload(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[], uint16_t nb_events,
+				    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_deq_all_offload_tst(port, ev, timeout_ticks);
+}
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 5989d7b0d5..6e29e62cbc 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -330,6 +330,81 @@ sources += files(
         'cn20k_eventdev.c',
         'cn20k_worker.c',
 )
+
+if host_machine.cpu_family().startswith('aarch') and not disable_template
+sources += files(
+        'deq/cn20k/deq_0_15_burst.c',
+        'deq/cn20k/deq_16_31_burst.c',
+        'deq/cn20k/deq_32_47_burst.c',
+        'deq/cn20k/deq_48_63_burst.c',
+        'deq/cn20k/deq_64_79_burst.c',
+        'deq/cn20k/deq_80_95_burst.c',
+        'deq/cn20k/deq_96_111_burst.c',
+        'deq/cn20k/deq_112_127_burst.c',
+        'deq/cn20k/deq_0_15_seg_burst.c',
+        'deq/cn20k/deq_16_31_seg_burst.c',
+        'deq/cn20k/deq_32_47_seg_burst.c',
+        'deq/cn20k/deq_48_63_seg_burst.c',
+        'deq/cn20k/deq_64_79_seg_burst.c',
+        'deq/cn20k/deq_80_95_seg_burst.c',
+        'deq/cn20k/deq_96_111_seg_burst.c',
+        'deq/cn20k/deq_112_127_seg_burst.c',
+        'deq/cn20k/deq_0_15.c',
+        'deq/cn20k/deq_16_31.c',
+        'deq/cn20k/deq_32_47.c',
+        'deq/cn20k/deq_48_63.c',
+        'deq/cn20k/deq_64_79.c',
+        'deq/cn20k/deq_80_95.c',
+        'deq/cn20k/deq_96_111.c',
+        'deq/cn20k/deq_112_127.c',
+        'deq/cn20k/deq_0_15_seg.c',
+        'deq/cn20k/deq_16_31_seg.c',
+        'deq/cn20k/deq_32_47_seg.c',
+        'deq/cn20k/deq_48_63_seg.c',
+        'deq/cn20k/deq_64_79_seg.c',
+        'deq/cn20k/deq_80_95_seg.c',
+        'deq/cn20k/deq_96_111_seg.c',
+        'deq/cn20k/deq_112_127_seg.c',
+        'deq/cn20k/deq_0_15_tmo.c',
+        'deq/cn20k/deq_16_31_tmo.c',
+        'deq/cn20k/deq_32_47_tmo.c',
+        'deq/cn20k/deq_48_63_tmo.c',
+        'deq/cn20k/deq_64_79_tmo.c',
+        'deq/cn20k/deq_80_95_tmo.c',
+        'deq/cn20k/deq_96_111_tmo.c',
+        'deq/cn20k/deq_112_127_tmo.c',
+        'deq/cn20k/deq_0_15_tmo_burst.c',
+        'deq/cn20k/deq_16_31_tmo_burst.c',
+        'deq/cn20k/deq_32_47_tmo_burst.c',
+        'deq/cn20k/deq_48_63_tmo_burst.c',
+        'deq/cn20k/deq_64_79_tmo_burst.c',
+        'deq/cn20k/deq_80_95_tmo_burst.c',
+        'deq/cn20k/deq_96_111_tmo_burst.c',
+        'deq/cn20k/deq_112_127_tmo_burst.c',
+        'deq/cn20k/deq_0_15_tmo_seg.c',
+        'deq/cn20k/deq_16_31_tmo_seg.c',
+        'deq/cn20k/deq_32_47_tmo_seg.c',
+        'deq/cn20k/deq_48_63_tmo_seg.c',
+        'deq/cn20k/deq_64_79_tmo_seg.c',
+        'deq/cn20k/deq_80_95_tmo_seg.c',
+        'deq/cn20k/deq_96_111_tmo_seg.c',
+        'deq/cn20k/deq_112_127_tmo_seg.c',
+        'deq/cn20k/deq_0_15_tmo_seg_burst.c',
+        'deq/cn20k/deq_16_31_tmo_seg_burst.c',
+        'deq/cn20k/deq_32_47_tmo_seg_burst.c',
+        'deq/cn20k/deq_48_63_tmo_seg_burst.c',
+        'deq/cn20k/deq_64_79_tmo_seg_burst.c',
+        'deq/cn20k/deq_80_95_tmo_seg_burst.c',
+        'deq/cn20k/deq_96_111_tmo_seg_burst.c',
+        'deq/cn20k/deq_112_127_tmo_seg_burst.c',
+        'deq/cn20k/deq_all_offload.c',
+)
+
+else
+sources += files(
+        'deq/cn20k/deq_all_offload.c',
+)
+endif
 endif
 
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 15/20] event/cnxk: support CN20K Tx adapter
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (12 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 14/20] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 16/20] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
                   ` (6 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c  | 126 +++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h  |   4 +
 drivers/event/cnxk/cn20k_tx_worker.h |  16 ++++
 3 files changed, 146 insertions(+)
 create mode 100644 drivers/event/cnxk/cn20k_tx_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index f5be3f3e89..06cf201146 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -6,6 +6,7 @@
 
 #include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
+#include "cn20k_tx_worker.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
@@ -166,6 +167,35 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+static int
+cn20k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	if (dev->tx_adptr_data == NULL)
+		return 0;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		void *ws_cookie;
+
+		ws_cookie = cnxk_sso_hws_get_cookie(ws);
+		ws_cookie = rte_realloc_socket(ws_cookie,
+					       sizeof(struct cnxk_sso_hws_cookie) +
+						       sizeof(struct cn20k_sso_hws) +
+						       dev->tx_adptr_data_sz,
+					       RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+		if (ws_cookie == NULL)
+			return -ENOMEM;
+		ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+		memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, dev->tx_adptr_data_sz);
+		event_dev->data->ports[i] = ws;
+	}
+
+	return 0;
+}
+
 #if defined(RTE_ARCH_ARM64)
 static inline void
 cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
@@ -697,6 +727,95 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+			      uint32_t *caps)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		*caps = 0;
+	else
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+	return 0;
+}
+
+static void
+cn20k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cn20k_eth_txq *txq;
+	struct roc_nix_sq *sq;
+	int i;
+
+	if (tx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+			cn20k_sso_txq_fc_update(eth_dev, i);
+	} else {
+		uint16_t sqes_per_sqb;
+
+		sq = &cnxk_eth_dev->sqs[tx_queue_id];
+		txq = eth_dev->data->tx_queues[tx_queue_id];
+		sqes_per_sqb = 1U << txq->sqes_per_sqb_log2;
+		if (cnxk_eth_dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
+			sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc / sqes_per_sqb);
+		txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
+	}
+}
+
+static int
+cn20k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint64_t tx_offloads;
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+
+	/* Can't enable tstamp if all the ports don't have it enabled. */
+	tx_offloads = cnxk_eth_dev->tx_offload_flags;
+	if (dev->tx_adptr_configured) {
+		uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+		uint8_t tstmp_ena = !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+		if (tstmp_ena && !tstmp_req)
+			dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+		else if (!tstmp_ena && tstmp_req)
+			tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+	}
+
+	dev->tx_offloads |= tx_offloads;
+	cn20k_sso_txq_fc_update(eth_dev, tx_queue_id);
+	rc = cn20k_sso_updt_tx_adptr_data(event_dev);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	dev->tx_adptr_configured = 1;
+
+	return 0;
+}
+
+static int
+cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+	return cn20k_sso_updt_tx_adptr_data(event_dev);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -722,6 +841,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
+	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
+	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
+	.eth_tx_adapter_start = cnxk_sso_tx_adapter_start,
+	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
+	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index ce12628f81..8623f790a2 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -24,6 +24,10 @@ struct __rte_cache_aligned cn20k_sso_hws {
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
 	int32_t xaq_lmt;
+	/* Tx Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
+	uint64_t lso_tun_fmt;
+	uint8_t tx_adptr_data[];
 };
 
 #endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
new file mode 100644
index 0000000000..63fbdf5328
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_TX_WORKER_H__
+#define __CN20K_TX_WORKER_H__
+
+#include <rte_eventdev.h>
+#include <rte_vect.h>
+
+#include "cn20k_eventdev.h"
+#include "cn20k_tx.h"
+#include "cnxk_eventdev_dp.h"
+#include <rte_event_eth_tx_adapter.h>
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 16/20] event/cnxk: support CN20K Tx adapter fast path
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (13 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 15/20] event/cnxk: support CN20K Tx adapter pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 17/20] common/cnxk: add SSO event aggregator pbhagavatula
                   ` (5 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c          |  29 ++++
 drivers/event/cnxk/cn20k_tx_worker.h         | 174 +++++++++++++++++++
 drivers/event/cnxk/meson.build               |  20 +++
 drivers/event/cnxk/tx/cn20k/tx_0_15.c        |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c    |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127.c     |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111.c      |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c  |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_all_offload.c |  40 +++++
 20 files changed, 559 insertions(+)
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 06cf201146..e9c2d3786f 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -15,6 +15,9 @@
 #define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
 
+#define CN20K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                                               \
+	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -300,6 +303,19 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 #undef R
 	};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq_seg[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_seg_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
 	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
 		if (dev->rx_offloads & NIX_RX_REAS_F) {
 			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue, sso_hws_reas_deq_seg);
@@ -347,6 +363,12 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 		}
 	}
 
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq_seg);
+	else
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq);
+
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 #else
 	RTE_SET_USED(event_dev);
 #endif
@@ -364,6 +386,13 @@ cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
 		event_dev->dequeue = cn20k_sso_hws_deq_all_offload_tst;
 		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
 	}
+	event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	if (dev->tx_offloads & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | NIX_TX_OFFLOAD_VLAN_QINQ_F |
+				NIX_TX_OFFLOAD_TSO_F | NIX_TX_OFFLOAD_TSTAMP_F)) {
+		event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+		event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+	}
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 63fbdf5328..02b2be4de2 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -13,4 +13,178 @@
 #include "cnxk_eventdev_dp.h"
 #include <rte_event_eth_tx_adapter.h>
 
+/* CN20K Tx event fastpath */
+
+static __rte_always_inline struct cn20k_eth_txq *
+cn20k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
+{
+	return (struct cn20k_eth_txq *)(txq_data[(txq_data[m->port] >> 48) +
+						 rte_event_eth_tx_adapter_txq_get(m)] &
+					(BIT_ULL(48) - 1));
+}
+
+static __rte_always_inline void
+cn20k_sso_txq_fc_wait(const struct cn20k_eth_txq *txq)
+{
+	int64_t avail;
+
+#ifdef RTE_ARCH_ARM64
+	int64_t val;
+
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.gt .Ldne%=				\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.le .Lrty%=				\n"
+		     ".Ldne%=:						\n"
+		     : [refill] "=&r"(avail), [val] "=&r" (val)
+		     : [addr] "r" (txq->fc_mem), [adj] "r" (txq->nb_sqb_bufs_adj),
+		       [shft] "r" (txq->sqes_per_sqb_log2)
+		     : "memory");
+#else
+	do {
+		avail = txq->nb_sqb_bufs_adj -
+			__atomic_load_n(txq->fc_mem, rte_memory_order_relaxed);
+	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
+#endif
+}
+
+static __rte_always_inline int32_t
+cn20k_sso_sq_depth(const struct cn20k_eth_txq *txq)
+{
+	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
+			(int32_t)__atomic_load_n(txq->fc_mem, rte_memory_order_relaxed);
+	return (avail << txq->sqes_per_sqb_log2) - avail;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
+		 uintptr_t lmt_addr, uint8_t sched_type, const uint64_t *txq_data,
+		 const uint32_t flags)
+{
+	uint8_t lnum = 0, loff = 0, shft = 0;
+	struct rte_mbuf *extm = NULL;
+	struct cn20k_eth_txq *txq;
+	uintptr_t laddr;
+	uint16_t segdw;
+	uintptr_t pa;
+	bool sec;
+
+	txq = cn20k_sso_hws_xtract_meta(m, txq_data);
+	if (cn20k_sso_sq_depth(txq) <= 0)
+		return 0;
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1);
+
+	cn20k_nix_tx_skeleton(txq, cmd, flags, 0);
+	/* Perform header writes before barrier
+	 * for TSO
+	 */
+	if (flags & NIX_TX_OFFLOAD_TSO_F)
+		cn20k_nix_xmit_prepare_tso(m, flags);
+
+	cn20k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, &sec, txq->mark_flag,
+			       txq->mark_fmt);
+
+	laddr = lmt_addr;
+	/* Prepare CPT instruction and get nixtx addr if
+	 * it is for CPT on same lmtline.
+	 */
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff, &shft, txq->sa_base,
+				   flags);
+
+	/* Move NIX desc to LMT/NIXTX area */
+	cn20k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
+
+	if (flags & NIX_TX_MULTI_SEG_F)
+		segdw = cn20k_nix_prepare_mseg(txq, m, &extm, (uint64_t *)laddr, flags);
+	else
+		segdw = cn20k_nix_tx_ext_subs(flags) + 2;
+
+	cn20k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		pa = txq->cpt_io_addr | 3 << 4;
+	else
+		pa = txq->io_addr | ((segdw - 1) << 4);
+
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
+
+	cn20k_sso_txq_fc_wait(txq);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_sec_fc_wait_one(txq);
+
+	roc_lmt_submit_steorl(lmt_id, pa);
+
+	/* Memory barrier to make sure lmtst store completes */
+	rte_io_wmb();
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+		cn20k_nix_free_extmbuf(extm);
+
+	return 1;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
+		       const uint64_t *txq_data, const uint32_t flags)
+{
+	struct rte_mbuf *m;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+
+	lmt_addr = ws->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	m = ev->mbuf;
+	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
+}
+
+#define T(name, sz, flags)                                                                         \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_##name(void *port, struct rte_event ev[],    \
+							     uint16_t nb_events);                  \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
+#define SSO_TX(fn, sz, flags)                                                                      \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint64_t cmd[sz];                                                                  \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data, flags);         \
+	}
+
+#define SSO_TX_SEG(fn, sz, flags)                                                                  \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];                               \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data,                 \
+					      (flags) | NIX_TX_MULTI_SEG_F);                       \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[],
+							      uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+								  uint16_t nb_events);
+
 #endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 6e29e62cbc..c5c7d38173 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -400,9 +400,29 @@ sources += files(
         'deq/cn20k/deq_all_offload.c',
 )
 
+sources += files(
+        'tx/cn20k/tx_0_15.c',
+        'tx/cn20k/tx_16_31.c',
+        'tx/cn20k/tx_32_47.c',
+        'tx/cn20k/tx_48_63.c',
+        'tx/cn20k/tx_64_79.c',
+        'tx/cn20k/tx_80_95.c',
+        'tx/cn20k/tx_96_111.c',
+        'tx/cn20k/tx_112_127.c',
+        'tx/cn20k/tx_0_15_seg.c',
+        'tx/cn20k/tx_16_31_seg.c',
+        'tx/cn20k/tx_32_47_seg.c',
+        'tx/cn20k/tx_48_63_seg.c',
+        'tx/cn20k/tx_64_79_seg.c',
+        'tx/cn20k/tx_80_95_seg.c',
+        'tx/cn20k/tx_96_111_seg.c',
+        'tx/cn20k/tx_112_127_seg.c',
+        'tx/cn20k/tx_all_offload.c',
+)
 else
 sources += files(
         'deq/cn20k/deq_all_offload.c',
+        'tx/cn20k/tx_all_offload.c',
 )
 endif
 endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15.c b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
new file mode 100644
index 0000000000..b681bc8ab0
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
new file mode 100644
index 0000000000..1dacb63d4b
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127.c b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
new file mode 100644
index 0000000000..abdb8b76a1
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
new file mode 100644
index 0000000000..c39d331b25
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31.c b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
new file mode 100644
index 0000000000..5b88c47914
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
new file mode 100644
index 0000000000..13f00ac478
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47.c b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
new file mode 100644
index 0000000000..1f6008c425
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
new file mode 100644
index 0000000000..587f22df3a
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63.c b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
new file mode 100644
index 0000000000..c712825417
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
new file mode 100644
index 0000000000..1fc11ec904
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79.c b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
new file mode 100644
index 0000000000..0e427f79d8
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
new file mode 100644
index 0000000000..6e1ae41b26
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95.c b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
new file mode 100644
index 0000000000..8c87d2341d
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
new file mode 100644
index 0000000000..43a143f4bd
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111.c b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
new file mode 100644
index 0000000000..1a43af8b02
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
new file mode 100644
index 0000000000..e0e1d8a4ef
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_all_offload.c b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
new file mode 100644
index 0000000000..d2158a4256
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[], uint16_t nb_events)
+{
+	const uint32_t flags = (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_MBUF_NOFF_F |
+				NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+					       uint16_t nb_events)
+{
+	const uint32_t flags =
+		(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+		 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_MBUF_NOFF_F | NIX_TX_OFFLOAD_TSO_F |
+		 NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_SECURITY_F | NIX_TX_MULTI_SEG_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 17/20] common/cnxk: add SSO event aggregator
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (14 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 16/20] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 18/20] event/cnxk: add Rx/Tx event vector support pbhagavatula
                   ` (4 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add configuration APIs for CN20K SSO event
aggregator which allows SSO to generate event
vectors.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/sso.h        |  33 ++++
 drivers/common/cnxk/roc_mbox.h      |  21 +++
 drivers/common/cnxk/roc_model.h     |  13 ++
 drivers/common/cnxk/roc_nix_queue.c |   5 -
 drivers/common/cnxk/roc_sso.c       | 230 +++++++++++++++++++++++++++-
 drivers/common/cnxk/roc_sso.h       |  19 ++-
 drivers/common/cnxk/roc_sso_priv.h  |   4 +
 drivers/common/cnxk/version.map     |   4 +
 8 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/drivers/common/cnxk/hw/sso.h b/drivers/common/cnxk/hw/sso.h
index 09b8d4955f..79337a8a3b 100644
--- a/drivers/common/cnxk/hw/sso.h
+++ b/drivers/common/cnxk/hw/sso.h
@@ -146,6 +146,7 @@
 #define SSO_LF_GGRP_OP_ADD_WORK0 (0x0ull)
 #define SSO_LF_GGRP_OP_ADD_WORK1 (0x8ull)
 #define SSO_LF_GGRP_QCTL	 (0x20ull)
+#define SSO_LF_GGRP_TAG_CFG	 (0x40ull)
 #define SSO_LF_GGRP_EXE_DIS	 (0x80ull)
 #define SSO_LF_GGRP_INT		 (0x100ull)
 #define SSO_LF_GGRP_INT_W1S	 (0x108ull)
@@ -159,6 +160,10 @@
 #define SSO_LF_GGRP_MISC_CNT	 (0x200ull)
 #define SSO_LF_GGRP_OP_AW_LMTST	 (0x400ull)
 
+#define SSO_LF_GGRP_AGGR_CFG	    (0x300ull)
+#define SSO_LF_GGRP_AGGR_CTX_BASE   (0x308ull)
+#define SSO_LF_GGRP_AGGR_CTX_INSTOP (0x310ull)
+
 #define SSO_AF_IAQ_FREE_CNT_MASK      0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_MASK     0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_SHIFT    16
@@ -230,5 +235,33 @@
 #define SSO_TT_ATOMIC	(0x1ull)
 #define SSO_TT_UNTAGGED (0x2ull)
 #define SSO_TT_EMPTY	(0x3ull)
+#define SSO_TT_AGG	(0x3ull)
+
+#define SSO_LF_AGGR_INSTOP_FLUSH	(0x0ull)
+#define SSO_LF_AGGR_INSTOP_EVICT	(0x1ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_FLUSH (0x2ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_EVICT (0x3ull)
+
+#define SSO_AGGR_CTX_SZ	    16
+#define SSO_AGGR_NUM_CTX(a) (1 << (a + 6))
+#define SSO_AGGR_MIN_CTX    SSO_AGGR_NUM_CTX(0)
+#define SSO_AGGR_MAX_CTX    SSO_AGGR_NUM_CTX(10)
+#define SSO_AGGR_DEF_TMO    0x3Full
+
+struct sso_agq_ctx {
+	uint64_t ena : 1;
+	uint64_t rsvd_1_3 : 3;
+	uint64_t vwqe_aura : 17;
+	uint64_t rsvd_21_31 : 11;
+	uint64_t tag : 32;
+	uint64_t tt : 2;
+	uint64_t rsvd_66_67 : 2;
+	uint64_t swqe_tag : 12;
+	uint64_t max_vsize_exp : 4;
+	uint64_t vtimewait : 12;
+	uint64_t xqe_type : 4;
+	uint64_t cnt_ena : 1;
+	uint64_t rsvd_101_127 : 27;
+};
 
 #endif /* __SSO_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 63139b5517..db6e8f07b3 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,10 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_AGGR_SET_CONFIG, 0x615, sso_aggr_setconfig, sso_aggr_setconfig,  \
+	  msg_rsp)                                                             \
+	M(SSO_AGGR_GET_STATS, 0x616, sso_aggr_get_stats, sso_info_req,         \
+	  sso_aggr_stats)                                                      \
 	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
@@ -2191,6 +2195,13 @@ struct sso_grp_stash_cfg {
 	uint8_t __io num_linesm1 : 4;
 };
 
+struct sso_aggr_setconfig {
+	struct mbox_msghdr hdr;
+	uint16_t __io npa_pf_func;
+	uint16_t __io hwgrp;
+	uint64_t __io rsvd[2];
+};
+
 struct sso_grp_stats {
 	struct mbox_msghdr hdr;
 	uint16_t __io grp;
@@ -2210,6 +2221,16 @@ struct sso_hws_stats {
 	uint64_t __io arbitration;
 };
 
+struct sso_aggr_stats {
+	struct mbox_msghdr hdr;
+	uint16_t __io grp;
+	uint64_t __io flushed;
+	uint64_t __io completed;
+	uint64_t __io npa_fail;
+	uint64_t __io timeout;
+	uint64_t __io rsvd[4];
+};
+
 /* CPT mailbox error codes
  * Range 901 - 1000.
  */
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 4e686bea2c..0de141b0cc 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include "roc_bits.h"
+#include "roc_constants.h"
 
 extern struct roc_model *roc_model;
 
@@ -157,6 +158,18 @@ roc_model_is_cn20k(void)
 	return roc_model_runtime_is_cn20k();
 }
 
+static inline uint16_t
+roc_model_optimal_align_sz(void)
+{
+	if (roc_model_is_cn9k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn10k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn20k())
+		return ROC_ALIGN << 1;
+	return 128;
+}
+
 static inline uint64_t
 roc_model_is_cn98xx(void)
 {
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 06029275af..e852211ba4 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -794,9 +794,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 		aq->rq.good_utag = rq->tag_mask >> 24;
 		aq->rq.bad_utag = rq->tag_mask >> 24;
 		aq->rq.ltag = rq->tag_mask & BITMASK_ULL(24, 0);
-
-		if (rq->vwqe_ena)
-			aq->rq.wqe_aura = roc_npa_aura_handle_to_aura(rq->vwqe_aura_handle);
 	} else {
 		/* CQ mode */
 		aq->rq.sso_ena = 0;
@@ -881,8 +878,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 			aq->rq_mask.good_utag = ~aq->rq_mask.good_utag;
 			aq->rq_mask.bad_utag = ~aq->rq_mask.bad_utag;
 			aq->rq_mask.ltag = ~aq->rq_mask.ltag;
-			if (rq->vwqe_ena)
-				aq->rq_mask.wqe_aura = ~aq->rq_mask.wqe_aura;
 		} else {
 			/* CQ mode */
 			aq->rq_mask.sso_ena = ~aq->rq_mask.sso_ena;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index b4cfe04d40..64c9f10b95 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -500,9 +500,231 @@ roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 	mbox_put(mbox);
 }
 
+static void
+sso_agq_op_wait(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	uint64_t reg;
+
+	reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	while (reg & BIT_ULL(2)) {
+		plt_delay_us(100);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) +
+				 SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	}
+}
+
+int
+roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_agq_data *data)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	uint32_t cnt, off;
+	struct mbox *mbox;
+	uintptr_t ptr;
+	uint64_t reg;
+	int rc;
+
+	if (sso->agg_mem[hwgrp] == 0) {
+		mbox = mbox_get(sso->dev.mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			mbox_process(mbox);
+			req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+			if (req == NULL) {
+				plt_err("Failed to allocate AGQ config mbox.\n");
+				mbox_put(mbox);
+				return -EIO;
+			}
+		}
+
+		req->hwgrp = hwgrp;
+		req->npa_pf_func = idev_npa_pffunc_get();
+		rc = mbox_process(mbox);
+		if (rc < 0) {
+			plt_err("Failed to set HWGRP AGQ config rc=%d\n", rc);
+			mbox_put(mbox);
+			return rc;
+		}
+
+		mbox_put(mbox);
+
+		sso->agg_mem[hwgrp] =
+			(uintptr_t)plt_zmalloc(SSO_AGGR_MIN_CTX * sizeof(struct sso_agq_ctx),
+					       roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0)
+			return -ENOMEM;
+		sso->agg_cnt[hwgrp] = SSO_AGGR_MIN_CTX;
+		sso->agg_used[hwgrp] = 0;
+		plt_wmb();
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = (plt_log2_u32(SSO_AGGR_MIN_CTX) - 6) << 16;
+		reg |= (SSO_AGGR_DEF_TMO << 4) | 1;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	}
+
+	if (sso->agg_cnt[hwgrp] >= SSO_AGGR_MAX_CTX)
+		return -ENOSPC;
+
+	if (sso->agg_cnt[hwgrp] == sso->agg_used[hwgrp]) {
+		ptr = sso->agg_mem[hwgrp];
+		cnt = sso->agg_cnt[hwgrp] << 1;
+		sso->agg_mem[hwgrp] = (uintptr_t)plt_zmalloc(cnt * sizeof(struct sso_agq_ctx),
+							     roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0) {
+			sso->agg_mem[hwgrp] = ptr;
+			return -ENOMEM;
+		}
+
+		memcpy((void *)sso->agg_mem[hwgrp], (void *)ptr,
+		       sso->agg_cnt[hwgrp] * sizeof(struct sso_agq_ctx));
+		plt_wmb();
+		sso_agq_op_wait(roc_sso, hwgrp);
+		/* Base address has changed, evict old entries. */
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg &= ~GENMASK_ULL(19, 16);
+		reg |= (uint64_t)(plt_log2_u32(cnt) - 6) << 16;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg = SSO_LF_AGGR_INSTOP_GLOBAL_EVICT << 4;
+		plt_write64(reg,
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+		sso_agq_op_wait(roc_sso, hwgrp);
+		plt_free((void *)ptr);
+
+		sso->agg_cnt[hwgrp] = cnt;
+		off = sso->agg_used[hwgrp];
+	} else {
+		ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+		for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+			if (!ctx[cnt].ena)
+				break;
+		}
+		if (cnt == sso->agg_cnt[hwgrp])
+			return -EINVAL;
+		off = cnt;
+	}
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += off;
+	ctx->ena = 1;
+	ctx->tt = data->tt;
+	ctx->tag = data->tag;
+	ctx->swqe_tag = data->stag;
+	ctx->cnt_ena = data->cnt_ena;
+	ctx->xqe_type = data->xqe_type;
+	ctx->vtimewait = data->vwqe_wait_tmo;
+	ctx->vwqe_aura = data->vwqe_aura;
+	ctx->max_vsize_exp = data->vwqe_max_sz_exp - 2;
+
+	plt_wmb();
+	sso->agg_used[hwgrp]++;
+
+	return 0;
+}
+
+void
+roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint64_t reg;
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += agq_id;
+
+	if (!ctx->ena)
+		return;
+
+	reg = SSO_LF_AGGR_INSTOP_FLUSH << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+
+	memset(ctx, 0, sizeof(struct sso_agq_ctx));
+	plt_wmb();
+	sso->agg_used[hwgrp]--;
+
+	/* Flush the context from CTX Cache */
+	reg = SSO_LF_AGGR_INSTOP_EVICT << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+}
+
+void
+roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	struct mbox *mbox;
+	uint32_t cnt;
+	int rc;
+
+	if (!roc_sso->feat.eva_present)
+		return;
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+		if (!ctx[cnt].ena)
+			continue;
+		roc_sso_hwgrp_agq_free(roc_sso, hwgrp, cnt);
+	}
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+	plt_free((void *)sso->agg_mem[hwgrp]);
+	sso->agg_mem[hwgrp] = 0;
+	sso->agg_cnt[hwgrp] = 0;
+	sso->agg_used[hwgrp] = 0;
+
+	mbox = mbox_get(sso->dev.mbox);
+	req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+	if (req == NULL) {
+		mbox_process(mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			plt_err("Failed to allocate AGQ config mbox.\n");
+			mbox_put(mbox);
+			return;
+		}
+	}
+
+	req->hwgrp = hwgrp;
+	req->npa_pf_func = 0;
+	rc = mbox_process(mbox);
+	if (rc < 0)
+		plt_err("Failed to set HWGRP AGQ config rc=%d\n", rc);
+	mbox_put(mbox);
+}
+
+uint32_t
+roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag_mask,
+			   uint8_t xqe_type)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint32_t i;
+
+	plt_rmb();
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (i = 0; i < sso->agg_used[hwgrp]; i++) {
+		if (!ctx[i].ena)
+			continue;
+		if (ctx[i].tag == tag_mask && ctx[i].xqe_type == xqe_type)
+			return i;
+	}
+
+	return UINT32_MAX;
+}
+
 int
-roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
-			struct roc_sso_hwgrp_stats *stats)
+roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_hwgrp_stats *stats)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
 	struct sso_grp_stats *req_rsp;
@@ -1058,10 +1280,14 @@ void
 roc_sso_rsrc_fini(struct roc_sso *roc_sso)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	uint32_t cnt;
 
 	if (!roc_sso->nb_hws && !roc_sso->nb_hwgrp)
 		return;
 
+	for (cnt = 0; cnt < roc_sso->nb_hwgrp; cnt++)
+		roc_sso_hwgrp_agq_release(roc_sso, cnt);
+
 	sso_unregister_irqs_priv(roc_sso, sso->pci_dev->intr_handle,
 				 roc_sso->nb_hws, roc_sso->nb_hwgrp);
 	sso_lf_free(&sso->dev, SSO_LF_TYPE_HWS, roc_sso->nb_hws);
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 021db22c86..f73128087a 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -47,6 +47,17 @@ struct roc_sso_xaq_data {
 	void *mem;
 };
 
+struct roc_sso_agq_data {
+	uint8_t tt;
+	uint8_t cnt_ena;
+	uint8_t xqe_type;
+	uint16_t stag;
+	uint32_t tag;
+	uint32_t vwqe_max_sz_exp;
+	uint64_t vwqe_wait_tmo;
+	uint64_t vwqe_aura;
+};
+
 struct roc_sso {
 	struct plt_pci_device *pci_dev;
 	/* Public data. */
@@ -100,6 +111,12 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 					 uint16_t nb_stash);
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
+int __roc_api roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp,
+				      struct roc_sso_agq_data *data);
+void __roc_api roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id);
+void __roc_api roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp);
+uint32_t __roc_api roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag,
+					      uint8_t xqe_type);
 
 /* Utility function */
 uint16_t __roc_api roc_sso_pf_func_get(void);
@@ -107,7 +124,7 @@ uint16_t __roc_api roc_sso_pf_func_get(void);
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
-int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
+int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp,
 				      struct roc_sso_hwgrp_stats *stats);
 int __roc_api roc_sso_hws_stats_get(struct roc_sso *roc_sso, uint8_t hws,
 				    struct roc_sso_hws_stats *stats);
diff --git a/drivers/common/cnxk/roc_sso_priv.h b/drivers/common/cnxk/roc_sso_priv.h
index 21c59c57e6..d6dc6dedd3 100644
--- a/drivers/common/cnxk/roc_sso_priv.h
+++ b/drivers/common/cnxk/roc_sso_priv.h
@@ -13,6 +13,10 @@ struct sso_rsrc {
 struct sso {
 	struct plt_pci_device *pci_dev;
 	struct dev dev;
+	/* EVA memory area */
+	uintptr_t agg_mem[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_used[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_cnt[MAX_RVU_BLKLF_CNT];
 	/* Interrupt handler args. */
 	struct sso_rsrc hws_rsrc[MAX_RVU_BLKLF_CNT];
 	struct sso_rsrc hwgrp_rsrc[MAX_RVU_BLKLF_CNT];
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 315596217c..d0e9436e9b 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -499,6 +499,10 @@ INTERNAL {
 	roc_sso_dev_fini;
 	roc_sso_dev_init;
 	roc_sso_dump;
+	roc_sso_hwgrp_agq_alloc;
+	roc_sso_hwgrp_agq_free;
+	roc_sso_hwgrp_agq_from_tag;
+	roc_sso_hwgrp_agq_release;
 	roc_sso_hwgrp_alloc_xaq;
 	roc_sso_hwgrp_base_get;
 	roc_sso_hwgrp_free_xaq_aura;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 18/20] event/cnxk: add Rx/Tx event vector support
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (15 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 17/20] common/cnxk: add SSO event aggregator pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 19/20] common/cnxk: update timer base code pbhagavatula
                   ` (3 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add Event vector support for CN20K Rx/Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c      | 185 ++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_tx_worker.h     |  84 ++++++++++
 drivers/event/cnxk/cn20k_worker.h        |  63 ++++++++
 drivers/event/cnxk/cnxk_eventdev.h       |   3 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  16 +-
 5 files changed, 340 insertions(+), 11 deletions(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index e9c2d3786f..bb1c2ca18f 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -73,6 +73,7 @@ cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->xaq_lmt = dev->xaq_lmt;
 	ws->fc_cache_space = dev->fc_cache_space;
 	ws->aw_lmt = dev->sso.lmt_base;
+	ws->lmt_base = dev->sso.lmt_base;
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -657,7 +658,8 @@ cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
 	else
 		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
 			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
-			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -704,6 +706,156 @@ cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
 	eventdev_fops_tstamp_update(event_dev);
 }
 
+static int
+cn20k_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, uint16_t port_id,
+		     const struct rte_event_eth_rx_adapter_queue_conf *queue_conf, int agq)
+{
+	struct roc_nix_rq *rq;
+	uint32_t tag_mask;
+	uint16_t wqe_skip;
+	uint8_t tt;
+	int rc;
+
+	rq = &cnxk_eth_dev->rqs[rq_id];
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+		tag_mask = agq;
+		tt = SSO_TT_AGG;
+		rq->flow_tag_width = 0;
+	} else {
+		tag_mask = (port_id & 0xFF) << 20;
+		tag_mask |= (RTE_EVENT_TYPE_ETHDEV << 28);
+		tt = queue_conf->ev.sched_type;
+		rq->flow_tag_width = 20;
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) {
+			rq->flow_tag_width = 0;
+			tag_mask |= queue_conf->ev.flow_id;
+		}
+	}
+
+	rq->tag_mask = tag_mask;
+	rq->sso_ena = 1;
+	rq->tt = tt;
+	rq->hwgrp = queue_conf->ev.queue_id;
+	wqe_skip = RTE_ALIGN_CEIL(sizeof(struct rte_mbuf), ROC_CACHE_LINE_SZ);
+	wqe_skip = wqe_skip / ROC_CACHE_LINE_SZ;
+	rq->wqe_skip = wqe_skip;
+
+	rc = roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+	return rc;
+}
+
+static int
+cn20k_sso_rx_adapter_vwqe_enable(struct cnxk_sso_evdev *dev, uint16_t port_id, uint16_t rq_id,
+				 const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	uint32_t agq, tag_mask, stag_mask;
+	struct roc_sso_agq_data data;
+	int rc;
+
+	tag_mask = (port_id & 0xff) << 20;
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)
+		tag_mask |= queue_conf->ev.flow_id;
+	else
+		tag_mask |= rq_id;
+
+	stag_mask = tag_mask;
+	tag_mask |= RTE_EVENT_TYPE_ETHDEV_VECTOR << 28;
+	stag_mask |= RTE_EVENT_TYPE_ETHDEV << 28;
+
+	memset(&data, 0, sizeof(struct roc_sso_agq_data));
+	data.tag = tag_mask;
+	data.tt = queue_conf->ev.sched_type;
+	data.stag = stag_mask;
+	data.vwqe_aura = roc_npa_aura_handle_to_aura(queue_conf->vector_mp->pool_id);
+	data.vwqe_max_sz_exp = rte_log2_u32(queue_conf->vector_sz);
+	data.vwqe_wait_tmo = queue_conf->vector_timeout_ns / ((SSO_AGGR_DEF_TMO + 1) * 100);
+	data.xqe_type = 0;
+
+	rc = roc_sso_hwgrp_agq_alloc(&dev->sso, queue_conf->ev.queue_id, &data);
+	if (rc < 0)
+		return rc;
+
+	agq = roc_sso_hwgrp_agq_from_tag(&dev->sso, queue_conf->ev.queue_id, tag_mask, 0);
+	return agq;
+}
+
+static int
+cn20k_rx_adapter_queue_add(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id,
+			   const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t port = eth_dev->data->port_id;
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	int i, rc = 0, agq = 0;
+
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			rc |= cn20k_rx_adapter_queue_add(event_dev, eth_dev, i, queue_conf);
+	} else {
+		rxq_sp = cnxk_eth_rxq_to_sp(eth_dev->data->rx_queues[rx_queue_id]);
+		cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+		rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+			cnxk_sso_updt_xae_cnt(dev, queue_conf->vector_mp,
+					      RTE_EVENT_TYPE_ETHDEV_VECTOR);
+			rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+			if (rc < 0)
+				return rc;
+
+			rc = cn20k_sso_rx_adapter_vwqe_enable(dev, port, rx_queue_id, queue_conf);
+			if (rc < 0)
+				return rc;
+			agq = rc;
+		}
+
+		rc = cn20k_sso_rxq_enable(cnxk_eth_dev, (uint16_t)rx_queue_id, port, queue_conf,
+					  agq);
+
+		/* Propagate force bp devarg */
+		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
+		cnxk_sso_tstamp_cfg(port, eth_dev, dev);
+		cnxk_eth_dev->nb_rxq_sso++;
+	}
+
+	if (rc < 0) {
+		plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+			queue_conf->ev.queue_id);
+		return rc;
+	}
+
+	dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+	return 0;
+}
+
+static int
+cn20k_rx_adapter_queue_del(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_nix_rq *rxq;
+	int i, rc = 0;
+
+	RTE_SET_USED(event_dev);
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			cn20k_rx_adapter_queue_del(event_dev, eth_dev, i);
+	} else {
+		rxq = &cnxk_eth_dev->rqs[rx_queue_id];
+		if (rxq->tt == SSO_TT_AGG)
+			roc_sso_hwgrp_agq_free(&dev->sso, rxq->hwgrp, rxq->tag_mask);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
+		cnxk_eth_dev->nb_rxq_sso--;
+	}
+
+	if (rc < 0)
+		plt_err("Failed to clear Rx adapter config port=%d, q=%d", eth_dev->data->port_id,
+			rx_queue_id);
+	return rc;
+}
+
 static int
 cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
@@ -720,7 +872,7 @@ cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	rc = cn20k_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
 	if (rc)
 		return -EINVAL;
 
@@ -753,7 +905,29 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+	return cn20k_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
+static int
+cn20k_sso_rx_adapter_vector_limits(const struct rte_eventdev *dev,
+				   const struct rte_eth_dev *eth_dev,
+				   struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	RTE_SET_USED(eth_dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		return -ENOTSUP;
+
+	limits->log2_sz = true;
+	limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+	limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+	limits->min_timeout_ns = (SSO_AGGR_DEF_TMO + 1) * 100;
+	limits->max_timeout_ns = (BITMASK_ULL(11, 0) + 1) * limits->min_timeout_ns;
+
+	return 0;
 }
 
 static int
@@ -767,7 +941,8 @@ cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_e
 	if (ret)
 		*caps = 0;
 	else
-		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -870,6 +1045,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_rx_adapter_vector_limits_get = cn20k_sso_rx_adapter_vector_limits,
+
 	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
 	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
 	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 02b2be4de2..811d2c6eeb 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -137,10 +137,58 @@ cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, ui
 	return 1;
 }
 
+static __rte_always_inline uint16_t
+cn20k_sso_vwqe_split_tx(struct cn20k_sso_hws *ws, struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+			uint64_t *cmd, const uint64_t *txq_data, const uint32_t flags)
+{
+	uint16_t count = 0, port, queue, ret = 0, last_idx = 0;
+	struct cn20k_eth_txq *txq;
+	int32_t space;
+	int i;
+
+	port = mbufs[0]->port;
+	queue = rte_event_eth_tx_adapter_txq_get(mbufs[0]);
+	for (i = 0; i < nb_mbufs; i++) {
+		if (port != mbufs[i]->port || queue != rte_event_eth_tx_adapter_txq_get(mbufs[i])) {
+			if (count) {
+				txq = (struct cn20k_eth_txq
+					       *)(txq_data[(txq_data[port] >> 48) + queue] &
+						  (BIT_ULL(48) - 1));
+				/* Transmit based on queue depth */
+				space = cn20k_sso_sq_depth(txq);
+				if (space < count)
+					goto done;
+				cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx],
+							   count, cmd, flags | NIX_TX_VWQE_F);
+				ret += count;
+				count = 0;
+			}
+			port = mbufs[i]->port;
+			queue = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+			last_idx = i;
+		}
+		count++;
+	}
+	if (count) {
+		txq = (struct cn20k_eth_txq *)(txq_data[(txq_data[port] >> 48) + queue] &
+					       (BIT_ULL(48) - 1));
+		/* Transmit based on queue depth */
+		space = cn20k_sso_sq_depth(txq);
+		if (space < count)
+			goto done;
+		cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx], count, cmd,
+					   flags | NIX_TX_VWQE_F);
+		ret += count;
+	}
+done:
+	return ret;
+}
+
 static __rte_always_inline uint16_t
 cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
 		       const uint64_t *txq_data, const uint32_t flags)
 {
+	struct cn20k_eth_txq *txq;
 	struct rte_mbuf *m;
 	uintptr_t lmt_addr;
 	uint16_t lmt_id;
@@ -148,6 +196,42 @@ cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t
 	lmt_addr = ws->lmt_base;
 	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
 
+	if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+		struct rte_mbuf **mbufs = ev->vec->mbufs;
+		uint64_t meta = *(uint64_t *)ev->vec;
+		uint16_t offset, nb_pkts, left;
+		int32_t space;
+
+		nb_pkts = meta & 0xFFFF;
+		offset = (meta >> 16) & 0xFFF;
+		if (meta & BIT(31)) {
+			txq = (struct cn20k_eth_txq
+				       *)(txq_data[(txq_data[meta >> 32] >> 48) + (meta >> 48)] &
+					  (BIT_ULL(48) - 1));
+
+			/* Transmit based on queue depth */
+			space = cn20k_sso_sq_depth(txq);
+			if (space <= 0)
+				return 0;
+			nb_pkts = nb_pkts < space ? nb_pkts : (uint16_t)space;
+			cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs + offset, nb_pkts,
+						   cmd, flags | NIX_TX_VWQE_F);
+		} else {
+			nb_pkts = cn20k_sso_vwqe_split_tx(ws, mbufs + offset, nb_pkts, cmd,
+							  txq_data, flags);
+		}
+		left = (meta & 0xFFFF) - nb_pkts;
+
+		if (!left) {
+			rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+		} else {
+			*(uint64_t *)ev->vec =
+				(meta & ~0xFFFFFFFUL) | (((uint32_t)nb_pkts + offset) << 16) | left;
+		}
+		rte_prefetch0(ws);
+		return !left;
+	}
+
 	m = ev->mbuf;
 	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
 }
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5b33dcd97d..fcd2944886 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -41,6 +41,58 @@ cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info
 	}
 }
 
+static __rte_always_inline void
+cn20k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, struct cn20k_sso_hws *ws)
+{
+	uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM;
+	struct cnxk_timesync_info *tstamp = ws->tstamp[port_id];
+	void *lookup_mem = ws->lookup_mem;
+	uintptr_t lbase = ws->lmt_base;
+	struct rte_event_vector *vec;
+	uint16_t nb_mbufs, non_vec;
+	struct rte_mbuf **wqe;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+	int i;
+
+	mbuf_init |= ((uint64_t)port_id) << 48;
+	vec = (struct rte_event_vector *)vwqe;
+	wqe = vec->mbufs;
+
+	rte_prefetch0(&vec->ptrs[0]);
+#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
+	for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
+		rte_prefetch0(&vec->ptrs[i]);
+
+	if (flags & NIX_RX_OFFLOAD_TSTAMP_F && tstamp)
+		mbuf_init |= 8;
+
+	nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+	nb_mbufs = cn20k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs, flags | NIX_RX_VWQE_F,
+					      lookup_mem, tstamp, lbase, 0);
+	wqe += nb_mbufs;
+	non_vec = vec->nb_elem - nb_mbufs;
+
+	while (non_vec) {
+		struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+
+		mbuf = (struct rte_mbuf *)((char *)cqe - sizeof(struct rte_mbuf));
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp((uint64_t)wqe[0], (uint64_t)mbuf, tstamp);
+		wqe[0] = (struct rte_mbuf *)mbuf;
+		non_vec--;
+		wqe++;
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
@@ -65,6 +117,17 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
 			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
 		u64[1] = mbuf;
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		__uint128_t vwqe_hdr = *(__uint128_t *)u64[1];
+
+		vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | ((vwqe_hdr & 0xFFFF) << 48) |
+			   ((uint64_t)port << 32);
+		*(uint64_t *)u64[1] = (uint64_t)vwqe_hdr;
+		cn20k_process_vwqe(u64[1], port, flags, ws);
+		/* Mark vector mempool object as get */
+		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
+					  1);
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 75680f8fee..e8607ab713 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -267,6 +267,9 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
 			      const struct rte_eth_dev *eth_dev);
 int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
 			     const struct rte_eth_dev *eth_dev);
+void cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev,
+			 struct cnxk_sso_evdev *dev);
+int cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id);
 int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
 				  const struct rte_eth_dev *eth_dev,
 				  int32_t tx_queue_id);
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3cac42111a..4cf48db74c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -167,9 +167,10 @@ cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
 	return rc;
 }
 
-static int
-cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+int
+cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
 	struct roc_nix_rq *rq;
 
 	rq = &cnxk_eth_dev->rqs[rq_id];
@@ -209,10 +210,11 @@ cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
 	return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
 }
 
-static void
-cnxk_sso_tstamp_cfg(uint16_t port_id, struct cnxk_eth_dev *cnxk_eth_dev,
-		    struct cnxk_sso_evdev *dev)
+void
+cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev, struct cnxk_sso_evdev *dev)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+
 	if (cnxk_eth_dev->rx_offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP || cnxk_eth_dev->ptp_en)
 		dev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
 }
@@ -263,7 +265,7 @@ cnxk_sso_rx_adapter_queue_add(
 
 		/* Propagate force bp devarg */
 		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
-		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, cnxk_eth_dev, dev);
+		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, eth_dev, dev);
 		cnxk_eth_dev->nb_rxq_sso++;
 	}
 
@@ -290,7 +292,7 @@ cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
 			cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
 	} else {
-		rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
 		cnxk_eth_dev->nb_rxq_sso--;
 
 		/* Enable drop_re if it was disabled earlier */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 19/20] common/cnxk: update timer base code
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (16 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 18/20] event/cnxk: add Rx/Tx event vector support pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-03 13:22 ` [PATCH 20/20] event/cnxk: add CN20K timer adapter pbhagavatula
                   ` (2 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Update event timer base code to support configuring
HW accelerated timer arm and cancel.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/tim.h        |  5 ++
 drivers/common/cnxk/roc_mbox.h      | 38 ++++++++++++-
 drivers/common/cnxk/roc_tim.c       | 84 ++++++++++++++++++++++++++---
 drivers/common/cnxk/roc_tim.h       | 20 +++++--
 drivers/common/cnxk/version.map     |  1 +
 drivers/event/cnxk/cnxk_tim_evdev.h |  5 --
 6 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index 82b094e3dc..75700a11b8 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -47,10 +47,15 @@
 #define TIM_LF_RAS_INT_ENA_W1S	   (0x310)
 #define TIM_LF_RAS_INT_ENA_W1C	   (0x318)
 #define TIM_LF_RING_REL		   (0x400)
+#define TIM_LF_SCHED_TIMER0	   (0x480)
+#define TIM_LF_RING_FIRST_EXPIRY   (0x558)
 
 #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
+#define TIM_MAX_INTERVAL_EXT_TICKS ((1ULL << 34) - 1)
 #define TIM_MAX_BUCKET_SIZE    ((1ULL << 20) - 2)
 #define TIM_MIN_BUCKET_SIZE    1
 #define TIM_BUCKET_WRAP_SIZE   3
+#define TIM_BUCKET_MIN_GAP     1
+#define TIM_NPA_TMO            0xFFFF
 
 #endif /* __TIM_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index db6e8f07b3..8c0e274684 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -164,6 +164,9 @@ struct mbox_msghdr {
 	  tim_intvl_rsp)                                                       \
 	M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req,          \
 	  tim_capture_rsp)                                                     \
+	M(TIM_CONFIG_HWWQE,    0x807, tim_config_hwwqe, tim_cfg_hwwqe_req,     \
+	  msg_rsp)                                                             \
+	M(TIM_GET_HW_INFO,     0x808, tim_get_hw_info, msg_req, tim_hw_info)   \
 	/* CPT mbox IDs (range 0xA00 - 0xBFF) */                               \
 	M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)    \
 	M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)                   \
@@ -2803,6 +2806,7 @@ enum tim_af_status {
 	TIM_AF_INVALID_ENABLE_DONTFREE = -815,
 	TIM_AF_ENA_DONTFRE_NSET_PERIODIC = -816,
 	TIM_AF_RING_ALREADY_DISABLED = -817,
+	TIM_AF_LF_START_SYNC_FAIL = -818,
 };
 
 enum tim_clk_srcs {
@@ -2895,13 +2899,43 @@ struct tim_config_req {
 	uint8_t __io enabledontfreebuffer;
 	uint32_t __io bucketsize;
 	uint32_t __io chunksize;
-	uint32_t __io interval;
+	uint32_t __io interval_lo;
 	uint8_t __io gpioedge;
-	uint8_t __io rsvd[7];
+	uint8_t __io rsvd[3];
+	uint32_t __io interval_hi;
 	uint64_t __io intervalns;
 	uint64_t __io clockfreq;
 };
 
+struct tim_cfg_hwwqe_req {
+	struct mbox_msghdr hdr;
+	uint16_t __io ring;
+	uint8_t __io grp_ena;
+	uint8_t __io hwwqe_ena;
+	uint8_t __io ins_min_gap;
+	uint8_t __io flw_ctrl_ena;
+	uint8_t __io wqe_rd_clr_ena;
+	uint16_t __io grp_tmo_cntr;
+	uint16_t __io npa_tmo_cntr;
+	uint16_t __io result_offset;
+	uint16_t __io event_count_offset;
+	uint64_t __io rsvd[2];
+};
+
+struct tim_feat_info {
+	uint16_t __io rings;
+	uint8_t __io engines;
+	uint8_t __io hwwqe : 1;
+	uint8_t __io intvl_ext : 1;
+	uint8_t __io rsvd8[4];
+	uint64_t __io rsvd[2];
+};
+
+struct tim_hw_info {
+	struct mbox_msghdr hdr;
+	struct tim_feat_info feat;
+};
+
 struct tim_lf_alloc_rsp {
 	struct mbox_msghdr hdr;
 	uint64_t __io tenns_clk;
diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index 095afbb9e6..db1c129806 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+#define LF_ENABLE_RETRY_CNT 8
+
 static int
 tim_fill_msix(struct roc_tim *roc_tim, uint16_t nb_ring)
 {
@@ -86,8 +88,11 @@ tim_err_desc(int rc)
 	case TIM_AF_RING_ALREADY_DISABLED:
 		plt_err("Ring already stopped");
 		break;
+	case TIM_AF_LF_START_SYNC_FAIL:
+		plt_err("Ring start sync failed.");
+		break;
 	default:
-		plt_err("Unknown Error.");
+		plt_err("Unknown Error: %d", rc);
 	}
 }
 
@@ -123,10 +128,12 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
 	struct mbox *mbox = mbox_get(dev->mbox);
+	uint8_t retry_cnt = LF_ENABLE_RETRY_CNT;
 	struct tim_enable_rsp *rsp;
 	struct tim_ring_req *req;
 	int rc = -ENOSPC;
 
+retry:
 	req = mbox_alloc_msg_tim_enable_ring(mbox);
 	if (req == NULL)
 		goto fail;
@@ -134,6 +141,9 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 
 	rc = mbox_process_msg(dev->mbox, (void **)&rsp);
 	if (rc) {
+		if (rc == TIM_AF_LF_START_SYNC_FAIL && retry_cnt--)
+			goto retry;
+
 		tim_err_desc(rc);
 		rc = -EIO;
 		goto fail;
@@ -183,10 +193,9 @@ roc_tim_lf_base_get(struct roc_tim *roc_tim, uint8_t ring_id)
 }
 
 int
-roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-		  enum roc_tim_clk_src clk_src, uint8_t ena_periodic,
-		  uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
-		  uint32_t interval, uint64_t intervalns, uint64_t clockfreq)
+roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id, enum roc_tim_clk_src clk_src,
+		  uint8_t ena_periodic, uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
+		  uint64_t interval, uint64_t intervalns, uint64_t clockfreq)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
@@ -204,7 +213,8 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	req->clocksource = clk_src;
 	req->enableperiodic = ena_periodic;
 	req->enabledontfreebuffer = ena_dfb;
-	req->interval = interval;
+	req->interval_lo = interval;
+	req->interval_hi = interval >> 32;
 	req->intervalns = intervalns;
 	req->clockfreq = clockfreq;
 	req->gpioedge = TIM_GPIO_LTOH_TRANS;
@@ -220,6 +230,41 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	return rc;
 }
 
+int
+roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id, struct roc_tim_hwwqe_cfg *cfg)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
+	struct dev *dev = &sso->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_cfg_hwwqe_req *req;
+	int rc = -ENOSPC;
+
+	req = mbox_alloc_msg_tim_config_hwwqe(mbox);
+	if (req == NULL)
+		goto fail;
+	req->ring = ring_id;
+	req->hwwqe_ena = cfg->hwwqe_ena;
+	req->grp_ena = cfg->grp_ena;
+	req->grp_tmo_cntr = cfg->grp_tmo_cyc;
+	req->flw_ctrl_ena = cfg->flw_ctrl_ena;
+	req->result_offset = cfg->result_offset;
+	req->event_count_offset = cfg->event_count_offset;
+
+	req->wqe_rd_clr_ena = 1;
+	req->npa_tmo_cntr = TIM_NPA_TMO;
+	req->ins_min_gap = TIM_BUCKET_MIN_GAP;
+
+	rc = mbox_process(mbox);
+	if (rc) {
+		tim_err_desc(rc);
+		rc = -EIO;
+	}
+
+fail:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_lf_interval(struct roc_tim *roc_tim, enum roc_tim_clk_src clk_src,
 		    uint64_t clockfreq, uint64_t *intervalns,
@@ -353,6 +398,31 @@ tim_free_lf_count_get(struct dev *dev, uint16_t *nb_lfs)
 	return 0;
 }
 
+static int
+tim_hw_info_get(struct roc_tim *roc_tim)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_tim->roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_tim_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info\n");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc != MBOX_MSG_INVALID)
+		mbox_memcpy(&roc_tim->feat, &rsp->feat, sizeof(roc_tim->feat));
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_init(struct roc_tim *roc_tim)
 {
@@ -372,6 +442,8 @@ roc_tim_init(struct roc_tim *roc_tim)
 	PLT_STATIC_ASSERT(sizeof(struct tim) <= TIM_MEM_SZ);
 	nb_lfs = roc_tim->nb_lfs;
 
+	rc = tim_hw_info_get(roc_tim);
+
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
 		plt_tim_dbg("Failed to get TIM resource count");
diff --git a/drivers/common/cnxk/roc_tim.h b/drivers/common/cnxk/roc_tim.h
index f9a9ad1887..2eb6e6962b 100644
--- a/drivers/common/cnxk/roc_tim.h
+++ b/drivers/common/cnxk/roc_tim.h
@@ -19,10 +19,20 @@ enum roc_tim_clk_src {
 	ROC_TIM_CLK_SRC_INVALID,
 };
 
+struct roc_tim_hwwqe_cfg {
+	uint8_t grp_ena;
+	uint8_t hwwqe_ena;
+	uint8_t flw_ctrl_ena;
+	uint16_t grp_tmo_cyc;
+	uint16_t result_offset;
+	uint16_t event_count_offset;
+};
+
 struct roc_tim {
 	struct roc_sso *roc_sso;
 	/* Public data. */
 	uint16_t nb_lfs;
+	struct tim_feat_info feat;
 	/* Private data. */
 #define TIM_MEM_SZ (1 * 1024)
 	uint8_t reserved[TIM_MEM_SZ] __plt_cache_aligned;
@@ -36,11 +46,11 @@ int __roc_api roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id,
 				uint64_t *start_tsc, uint32_t *cur_bkt);
 int __roc_api roc_tim_lf_disable(struct roc_tim *roc_tim, uint8_t ring_id);
 int __roc_api roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-				enum roc_tim_clk_src clk_src,
-				uint8_t ena_periodic, uint8_t ena_dfb,
-				uint32_t bucket_sz, uint32_t chunk_sz,
-				uint32_t interval, uint64_t intervalns,
-				uint64_t clockfreq);
+				enum roc_tim_clk_src clk_src, uint8_t ena_periodic, uint8_t ena_dfb,
+				uint32_t bucket_sz, uint32_t chunk_sz, uint64_t interval,
+				uint64_t intervalns, uint64_t clockfreq);
+int __roc_api roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id,
+				      struct roc_tim_hwwqe_cfg *cfg);
 int __roc_api roc_tim_lf_interval(struct roc_tim *roc_tim,
 				  enum roc_tim_clk_src clk_src,
 				  uint64_t clockfreq, uint64_t *intervalns,
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index d0e9436e9b..55e645e383 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -527,6 +527,7 @@ INTERNAL {
 	roc_tim_lf_alloc;
 	roc_tim_lf_base_get;
 	roc_tim_lf_config;
+	roc_tim_lf_config_hwwqe;
 	roc_tim_lf_disable;
 	roc_tim_lf_enable;
 	roc_tim_lf_free;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 6cf10dbf4d..9bd36158d8 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -16,11 +16,6 @@
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
 
-#include "hw/tim.h"
-
-#include "roc_model.h"
-#include "roc_tim.h"
-
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
 #define TICK2NSEC(__tck, __freq) (((__tck)*NSECPERSEC) / (__freq))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH 20/20] event/cnxk: add CN20K timer adapter
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (17 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 19/20] common/cnxk: update timer base code pbhagavatula
@ 2024-10-03 13:22 ` pbhagavatula
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-22  1:52 ` [PATCH 01/20] common/cnxk: implement SSO HW info Stephen Hemminger
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-03 13:22 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event timer adapter support for CN20K platform.
Implement new HWWQE insertion feature supported by CN20K platform.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_tim.c        |   6 +-
 drivers/event/cnxk/cn20k_eventdev.c  |  16 ++-
 drivers/event/cnxk/cn20k_worker.h    |   6 +
 drivers/event/cnxk/cnxk_tim_evdev.c  |  37 ++++-
 drivers/event/cnxk/cnxk_tim_evdev.h  |  14 ++
 drivers/event/cnxk/cnxk_tim_worker.c |  89 ++++++++++--
 drivers/event/cnxk/cnxk_tim_worker.h | 197 +++++++++++++++++++++++++++
 7 files changed, 349 insertions(+), 16 deletions(-)

diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index db1c129806..fc93b5e247 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -409,7 +409,7 @@ tim_hw_info_get(struct roc_tim *roc_tim)
 	mbox_alloc_msg_tim_get_hw_info(mbox);
 	rc = mbox_process_msg(mbox, (void **)&rsp);
 	if (rc && rc != MBOX_MSG_INVALID) {
-		plt_err("Failed to get SSO HW info\n");
+		plt_err("Failed to get TIM HW info\n");
 		rc = -EIO;
 		goto exit;
 	}
@@ -443,6 +443,10 @@ roc_tim_init(struct roc_tim *roc_tim)
 	nb_lfs = roc_tim->nb_lfs;
 
 	rc = tim_hw_info_get(roc_tim);
+	if (rc) {
+		plt_tim_dbg("Failed to get TIM HW info");
+		return 0;
+	}
 
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index bb1c2ca18f..682647c389 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -1020,6 +1020,13 @@ cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
 	return cn20k_sso_updt_tx_adptr_data(event_dev);
 }
 
+static int
+cn20k_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags, uint32_t *caps,
+		   const struct event_timer_adapter_ops **ops)
+{
+	return cnxk_tim_caps_get(evdev, flags, caps, ops, cn20k_sso_set_priv_mem);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -1054,6 +1061,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
 	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
 
+	.timer_adapter_caps_get = cn20k_tim_caps_get,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -1133,4 +1142,9 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_STASH "=<string>"
 			      CNXK_SSO_GW_MODE "=<int>"
-			      CNXK_SSO_FORCE_BP "=1");
+			      CNXK_SSO_FORCE_BP "=1"
+			      CNXK_TIM_DISABLE_NPA "=1"
+			      CNXK_TIM_CHNK_SLOTS "=<int>"
+			      CNXK_TIM_RINGS_LMT "=<int>"
+			      CNXK_TIM_STATS_ENA "=1"
+			      CNXK_TIM_EXT_CLK "=<string>");
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index fcd2944886..0f168404ba 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -5,6 +5,7 @@
 #ifndef __CN20K_WORKER_H__
 #define __CN20K_WORKER_H__
 
+#include <rte_event_timer_adapter.h>
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
@@ -128,6 +129,11 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		/* Mark vector mempool object as get */
 		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
 					  1);
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_TIMER) {
+		struct rte_event_timer *tev = (struct rte_event_timer *)u64[1];
+
+		tev->state = RTE_EVENT_TIMER_NOT_ARMED;
+		u64[1] = tev->ev.u64;
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index f8753b29ad..a158a4ad58 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -78,9 +78,25 @@ cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
 	return rc;
 }
 
+static int
+cnxk_tim_enable_hwwqe(struct cnxk_tim_evdev *dev, struct cnxk_tim_ring *tim_ring)
+{
+	struct roc_tim_hwwqe_cfg hwwqe_cfg;
+
+	memset(&hwwqe_cfg, 0, sizeof(hwwqe_cfg));
+	hwwqe_cfg.hwwqe_ena = 1;
+	hwwqe_cfg.grp_ena = 0;
+	hwwqe_cfg.flw_ctrl_ena = 0;
+	hwwqe_cfg.result_offset = CNXK_TIM_HWWQE_RES_OFFSET_B;
+
+	tim_ring->lmt_base = dev->tim.roc_sso->lmt_base;
+	return roc_tim_lf_config_hwwqe(&dev->tim, tim_ring->ring_id, &hwwqe_cfg);
+}
+
 static void
 cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 {
+	struct cnxk_tim_evdev *dev = cnxk_tim_priv_get();
 	uint8_t prod_flag = !tim_ring->prod_type_sp;
 
 	/* [STATS] [DFB/FB] [SP][MP]*/
@@ -98,6 +114,16 @@ cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 #undef FP
 	};
 
+	if (dev == NULL)
+		return;
+
+	if (dev->tim.feat.hwwqe) {
+		cnxk_tim_ops.arm_burst = cnxk_tim_arm_burst_hwwqe;
+		cnxk_tim_ops.arm_tmo_tick_burst = cnxk_tim_arm_tmo_burst_hwwqe;
+		cnxk_tim_ops.cancel_burst = cnxk_tim_timer_cancel_burst_hwwqe;
+		return;
+	}
+
 	cnxk_tim_ops.arm_burst =
 		arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag];
 	cnxk_tim_ops.arm_tmo_tick_burst =
@@ -224,12 +250,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		}
 	}
 
-	if (tim_ring->disable_npa) {
+	if (!dev->tim.feat.hwwqe && tim_ring->disable_npa) {
 		tim_ring->nb_chunks =
 			tim_ring->nb_timers /
 			CNXK_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
 		tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts;
 	} else {
+		tim_ring->disable_npa = 0;
 		tim_ring->nb_chunks = tim_ring->nb_timers;
 	}
 
@@ -255,6 +282,14 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		goto tim_chnk_free;
 	}
 
+	if (dev->tim.feat.hwwqe) {
+		rc = cnxk_tim_enable_hwwqe(dev, tim_ring);
+		if (rc < 0) {
+			plt_err("Failed to enable hwwqe");
+			goto tim_chnk_free;
+		}
+	}
+
 	plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
 	plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 9bd36158d8..a36084d714 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -15,6 +15,7 @@
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
+#include <rte_vect.h>
 
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
@@ -29,6 +30,8 @@
 #define CNXK_TIM_MIN_CHUNK_SLOTS    (0x1)
 #define CNXK_TIM_MAX_CHUNK_SLOTS    (0x1FFE)
 #define CNXK_TIM_MAX_POOL_CACHE_SZ  (16)
+#define CNXK_TIM_HWWQE_RES_OFFSET_B (24)
+#define CNXK_TIM_ENT_PER_LMT	    (7)
 
 #define CN9K_TIM_MIN_TMO_TKS (256)
 
@@ -124,6 +127,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	uintptr_t tbase;
 	uint64_t (*tick_fn)(uint64_t tbase);
 	uint64_t ring_start_cyc;
+	uint64_t lmt_base;
 	struct cnxk_tim_bkt *bkt;
 	struct rte_mempool *chunk_pool;
 	struct rte_reciprocal_u64 fast_div;
@@ -310,11 +314,21 @@ TIM_ARM_FASTPATH_MODES
 TIM_ARM_TMO_FASTPATH_MODES
 #undef FP
 
+uint16_t cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers);
+
+uint16_t cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				      struct rte_event_timer **tim, const uint64_t timeout_tick,
+				      const uint16_t nb_timers);
+
 uint16_t
 cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 			    struct rte_event_timer **tim,
 			    const uint16_t nb_timers);
 
+uint16_t cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+					   struct rte_event_timer **tim, const uint16_t nb_timers);
+
 int cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 				 const struct rte_event_timer *evtim, uint64_t *ticks_remaining);
 
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 5dcf6085dc..16b6b4e33f 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -32,15 +32,6 @@ cnxk_tim_arm_checks(const struct cnxk_tim_ring *const tim_ring,
 	return -EINVAL;
 }
 
-static inline void
-cnxk_tim_format_event(const struct rte_event_timer *const tim,
-		      struct cnxk_tim_ent *const entry)
-{
-	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 |
-		    (tim->ev.event & 0xFFFFFFFFF);
-	entry->wqe = tim->ev.u64;
-}
-
 static __rte_always_inline uint16_t
 cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 			 struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -72,7 +63,25 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	}
 
 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, index, __ATOMIC_RELAXED);
+		__atomic_fetch_add(&tim_ring->arm_cnt, index, rte_memory_order_relaxed);
+
+	return index;
+}
+
+uint16_t
+cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr, struct rte_event_timer **tim,
+			 const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t index;
+
+	for (index = 0; index < nb_timers; index++) {
+		if (cnxk_tim_arm_checks(tim_ring, tim[index]))
+			break;
+
+		if (cnxk_tim_add_entry_hwwqe(tim_ring, tim[index]))
+			break;
+	}
 
 	return index;
 }
@@ -126,12 +135,34 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	}
 
 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers,
-				   __ATOMIC_RELAXED);
+		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers, rte_memory_order_relaxed);
 
 	return set_timers;
 }
 
+uint16_t
+cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+			     struct rte_event_timer **tim, const uint64_t timeout_tick,
+			     const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t idx;
+
+	if (unlikely(!timeout_tick || timeout_tick > tim_ring->nb_bkts)) {
+		const enum rte_event_timer_state state = timeout_tick ?
+								 RTE_EVENT_TIMER_ERROR_TOOLATE :
+								 RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		for (idx = 0; idx < nb_timers; idx++)
+			tim[idx]->state = state;
+
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	return cnxk_tim_add_entry_tmo_hwwqe(tim_ring, tim, timeout_tick * tim_ring->tck_int,
+					    nb_timers);
+}
+
 #define FP(_name, _f2, _f1, _flags)                                            \
 	uint16_t __rte_noinline cnxk_tim_arm_tmo_tick_burst_##_name(           \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -153,7 +184,7 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	int ret;
 
 	RTE_SET_USED(adptr);
-	rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+	rte_atomic_thread_fence(rte_memory_order_acquire);
 	for (index = 0; index < nb_timers; index++) {
 		if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) {
 			rte_errno = EALREADY;
@@ -174,6 +205,38 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers)
+{
+	uint64_t *status;
+	uint16_t i;
+
+	RTE_SET_USED(adptr);
+	for (i = 0; i < nb_timers; i++) {
+		if (tim[i]->state == RTE_EVENT_TIMER_CANCELED) {
+			rte_errno = EALREADY;
+			break;
+		}
+
+		if (tim[i]->state != RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		status = &tim[i]->impl_opaque[1];
+		if (!__atomic_compare_exchange_n(status, (uint64_t *)&tim[i], 0, 0,
+						 rte_memory_order_release,
+						 rte_memory_order_relaxed)) {
+			rte_errno = ENOENT;
+			break;
+		}
+		tim[i]->state = RTE_EVENT_TIMER_CANCELED;
+	}
+
+	return i;
+}
+
 int
 cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 			     const struct rte_event_timer *evtim, uint64_t *ticks_remaining)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index f530d8c5c4..b9537bdf1c 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -132,6 +132,13 @@ cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
 	return (n - (d * rte_reciprocal_divide_u64(n, &R)));
 }
 
+static inline void
+cnxk_tim_format_event(const struct rte_event_timer *const tim, struct cnxk_tim_ent *const entry)
+{
+	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	entry->wqe = tim->ev.u64;
+}
+
 static __rte_always_inline void
 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
 			   const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
@@ -574,6 +581,196 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 	return nb_timers;
 }
 
+static int
+cnxk_tim_add_entry_hwwqe(struct cnxk_tim_ring *const tim_ring, struct rte_event_timer *const tim)
+{
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint64_t *status;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+	int rc = 0;
+
+	status = &tim->impl_opaque[0];
+	status[0] = 0;
+	status[1] = 0;
+
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	lmt = (uint64_t *)lmt_addr;
+
+	lmt[0] = tim->timeout_ticks * tim_ring->tck_int;
+	lmt[1] = 0x1;
+	lmt[2] = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	lmt[3] = (uint64_t)tim;
+
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (1UL << 4);
+	roc_lmt_submit_steorl(wdata, pa);
+
+	do {
+		rsp = __atomic_load_n(status, rte_memory_order_relaxed);
+		rsp &= 0xF0UL;
+	} while (!rsp);
+
+	rsp >>= 4;
+	switch (rsp) {
+	case 0x3:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		rc = !rc;
+		break;
+	case 0x4:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+		rc = !rc;
+		break;
+	case 0x1:
+		tim->state = RTE_EVENT_TIMER_ARMED;
+		break;
+	default:
+		tim->state = RTE_EVENT_TIMER_ERROR;
+		rc = !rc;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+cnxk_tim_add_entry_tmo_hwwqe(struct cnxk_tim_ring *const tim_ring,
+			     struct rte_event_timer **const tim, uint64_t intvl, uint16_t nb_timers)
+{
+	uint16_t cnt, i, j, done;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint64_t *status;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+
+	/* We have 32 LMTLINES per core, but use only 1 line as we need to check status */
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	done = 0;
+	lmt = (uint64_t *)lmt_addr;
+	/* We can do upto 7 timers per LMTLINE */
+	cnt = nb_timers / CNXK_TIM_ENT_PER_LMT;
+
+	lmt[0] = intvl;
+	lmt[1] = 0x1; /* Always relative */
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)(CNXK_TIM_ENT_PER_LMT << 4);
+	for (i = 0; i < cnt; i++) {
+		status = &tim[i * CNXK_TIM_ENT_PER_LMT]->impl_opaque[0];
+
+		for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+			cnxk_tim_format_event(tim[(i * CNXK_TIM_ENT_PER_LMT) + j],
+					      (struct cnxk_tim_ent *)&lmt[(j << 1) + 2]);
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[0] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[1] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = __atomic_load_n(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += CNXK_TIM_ENT_PER_LMT;
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x4:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+					if ((__atomic_load_n(&tim[(i * CNXK_TIM_ENT_PER_LMT) + j]
+								      ->impl_opaque[0],
+							     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+							RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+			goto done;
+		}
+	}
+
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)((nb_timers - cnt) << 4);
+	if (nb_timers - cnt) {
+		status = &tim[cnt]->impl_opaque[0];
+
+		for (i = 0; i < nb_timers - cnt; i++) {
+			cnxk_tim_format_event(tim[cnt + i],
+					      (struct cnxk_tim_ent *)&lmt[(i << 1) + 2]);
+			tim[cnt + i]->impl_opaque[0] = 0;
+			tim[cnt + i]->impl_opaque[1] = 0;
+			tim[cnt + i]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = __atomic_load_n(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += (nb_timers - cnt);
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x4:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < nb_timers - cnt; j++) {
+					if ((__atomic_load_n(&tim[cnt + j]->impl_opaque[0],
+							     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+done:
+	return done;
+}
+
 static int
 cnxk_tim_rm_entry(struct rte_event_timer *tim)
 {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* Re: [PATCH 02/20] event/cnxk: add CN20K specific device probe
  2024-10-03 13:22 ` [PATCH 02/20] event/cnxk: add CN20K specific device probe pbhagavatula
@ 2024-10-15 16:17   ` Stephen Hemminger
  0 siblings, 0 replies; 181+ messages in thread
From: Stephen Hemminger @ 2024-10-15 16:17 UTC (permalink / raw)
  To: pbhagavatula
  Cc: jerinj, Shijith Thotton, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra, Anatoly Burakov, dev

On Thu, 3 Oct 2024 18:52:19 +0530
<pbhagavatula@marvell.com> wrote:

> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Add platform specific event device probe and remove, also add
> event device info get function.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  doc/guides/eventdevs/cnxk.rst          | 30 +++++----
>  doc/guides/rel_notes/release_24_11.rst |  4 ++
>  drivers/common/cnxk/roc_sso.c          | 10 ++-
>  drivers/event/cnxk/cn20k_eventdev.c    | 93 ++++++++++++++++++++++++++
>  drivers/event/cnxk/meson.build         |  8 ++-
>  5 files changed, 128 insertions(+), 17 deletions(-)
>  create mode 100644 drivers/event/cnxk/cn20k_eventdev.c

Patch series needs to be rebased, there is now a conflict in the eventdevs doc
as well as the release note but maintainers sometimes fix that during merge.

$ cat doc/guides/eventdevs/cnxk.rst.rej 
--- doc/guides/eventdevs/cnxk.rst
+++ doc/guides/eventdevs/cnxk.rst
@@ -79,10 +80,11 @@ Runtime Config Options
 
     -a 0002:0e:00.0,single_ws=1
 
-- ``CN10K Getwork mode``
+- ``CN10K/CN20K Getwork mode``
 
-  CN10K supports three getwork prefetch modes no prefetch[0], prefetch
-  immediately[1] and delayed prefetch on forward progress event[2].
+  CN10K/CN20K supports three getwork prefetch modes no prefetch[0],
+  prefetch immediately[1] and delayed prefetch on forward progress
+  event[2].
   The default getwork mode is 2.
 
   For example::

^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 01/21] common/cnxk: implement SSO HW info
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (18 preceding siblings ...)
  2024-10-03 13:22 ` [PATCH 20/20] event/cnxk: add CN20K timer adapter pbhagavatula
@ 2024-10-21 20:57 ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 02/21] event/cnxk: add CN20K specific device probe pbhagavatula
                     ` (20 more replies)
  2024-10-22  1:52 ` [PATCH 01/20] common/cnxk: implement SSO HW info Stephen Hemminger
  20 siblings, 21 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Ankur Dwivedi, Anoob Joseph,
	Tejasree Kondoj, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HW info mbox to get hardware capabilities, and reuse
them instead of depending on hardcoded values.
Remove redundant includes.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
Depends-on: series-33580 (event/dsw: remove single event enqueue and dequeue)

 drivers/common/cnxk/roc_mbox.h              | 28 ++++++++++
 drivers/common/cnxk/roc_sso.c               | 58 ++++++++++++++++++---
 drivers/common/cnxk/roc_sso.h               |  9 ++--
 drivers/common/cnxk/version.map             |  1 +
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c   |  5 +-
 drivers/crypto/cnxk/cn9k_cryptodev_ops.c    |  9 +---
 drivers/event/cnxk/cn10k_eventdev.c         |  1 +
 drivers/event/cnxk/cn10k_eventdev.h         |  1 +
 drivers/event/cnxk/cn10k_worker.c           |  7 +--
 drivers/event/cnxk/cnxk_eventdev.c          |  4 +-
 drivers/event/cnxk/cnxk_eventdev.h          |  3 --
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  2 +
 drivers/event/cnxk/cnxk_eventdev_stats.c    |  2 +
 drivers/event/cnxk/cnxk_tim_evdev.c         |  2 +-
 drivers/event/cnxk/cnxk_tim_worker.c        |  2 +
 drivers/event/cnxk/cnxk_worker.c            |  4 +-
 16 files changed, 103 insertions(+), 35 deletions(-)

diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index dd65946e9e..63139b5517 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,7 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
 	  tim_lf_alloc_rsp)                                                    \
@@ -2119,6 +2120,33 @@ struct ssow_chng_mship {
 	uint16_t __io hwgrps[MAX_RVU_BLKLF_CNT]; /* Array of hwgrps. */
 };

+struct sso_feat_info {
+	uint8_t __io hw_flr : 1;
+	uint8_t __io hw_prefetch : 1;
+	uint8_t __io sw_prefetch : 1;
+	uint8_t __io lsw : 1;
+	uint8_t __io fwd_grp : 1;
+	uint8_t __io eva_present : 1;
+	uint8_t __io no_nsched : 1;
+	uint8_t __io tag_cfg : 1;
+	uint8_t __io gwc_per_core;
+	uint16_t __io hws;
+	uint16_t __io hwgrps;
+	uint16_t __io hwgrps_per_pf;
+	uint16_t __io iue;
+	uint16_t __io taq_lines;
+	uint16_t __io taq_ent_per_line;
+	uint16_t __io xaq_buf_size;
+	uint16_t __io xaq_wq_entries;
+	uint32_t __io eva_ctx_per_hwgrp;
+	uint64_t __io rsvd[2];
+};
+
+struct sso_hw_info {
+	struct mbox_msghdr hdr;
+	struct sso_feat_info feat;
+};
+
 struct sso_hw_setconfig {
 	struct mbox_msghdr hdr;
 	uint32_t __io npa_aura_id;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 2e3b134bfc..8a219b985b 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -191,7 +191,7 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 		goto exit;
 	}

-	roc_sso->max_hwgrp = rsrc_cnt->sso;
+	roc_sso->max_hwgrp = PLT_MIN(rsrc_cnt->sso, roc_sso->feat.hwgrps_per_pf);
 	roc_sso->max_hws = rsrc_cnt->ssow;

 	rc = 0;
@@ -200,6 +200,37 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 	return rc;
 }

+static int
+sso_hw_info_get(struct roc_sso *roc_sso)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct sso_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_sso_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc == MBOX_MSG_INVALID) {
+		roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	} else {
+		mbox_memcpy(&roc_sso->feat, &rsp->feat, sizeof(roc_sso->feat));
+
+		if (!roc_sso->feat.hwgrps_per_pf)
+			roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	}
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 void
 sso_hws_link_modify(uint8_t hws, uintptr_t base, struct plt_bitmap *bmp, uint16_t hwgrp[],
 		    uint16_t n, uint8_t set, uint16_t enable)
@@ -319,6 +350,12 @@ roc_sso_hwgrp_base_get(struct roc_sso *roc_sso, uint16_t hwgrp)
 	return dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | hwgrp << 12);
 }

+uint16_t
+roc_sso_pf_func_get(void)
+{
+	return idev_sso_pffunc_get();
+}
+
 uint64_t
 roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 {
@@ -670,9 +707,8 @@ roc_sso_hwgrp_init_xaq_aura(struct roc_sso *roc_sso, uint32_t nb_xae)
 	struct dev *dev = &sso->dev;
 	int rc;

-	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae,
-				     roc_sso->xae_waes, roc_sso->xaq_buf_size,
-				     roc_sso->nb_hwgrp);
+	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae, roc_sso->feat.xaq_wq_entries,
+				     roc_sso->feat.xaq_buf_size, roc_sso->nb_hwgrp);
 	return rc;
 }

@@ -953,9 +989,11 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 		goto hwgrp_alloc_fail;
 	}

-	roc_sso->xaq_buf_size = rsp_hwgrp->xaq_buf_size;
-	roc_sso->xae_waes = rsp_hwgrp->xaq_wq_entries;
-	roc_sso->iue = rsp_hwgrp->in_unit_entries;
+	if (!roc_sso->feat.xaq_buf_size || !roc_sso->feat.xaq_wq_entries || !roc_sso->feat.iue) {
+		roc_sso->feat.xaq_buf_size = rsp_hwgrp->xaq_buf_size;
+		roc_sso->feat.xaq_wq_entries = rsp_hwgrp->xaq_wq_entries;
+		roc_sso->feat.iue = rsp_hwgrp->in_unit_entries;
+	}

 	rc = sso_msix_fill(roc_sso, nb_hws, nb_hwgrp);
 	if (rc < 0) {
@@ -1059,6 +1097,12 @@ roc_sso_dev_init(struct roc_sso *roc_sso)
 		goto fail;
 	}

+	rc = sso_hw_info_get(roc_sso);
+	if (rc < 0) {
+		plt_err("Failed to get SSO HW info");
+		goto fail;
+	}
+
 	rc = sso_rsrc_get(roc_sso);
 	if (rc < 0) {
 		plt_err("Failed to get SSO resources");
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 4ac901762e..021db22c86 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -8,7 +8,7 @@
 #include "hw/ssow.h"

 #define ROC_SSO_AW_PER_LMT_LINE_LOG2 3
-#define ROC_SSO_XAE_PER_XAQ	     352
+#define ROC_SSO_MAX_HWGRP_PER_PF     256

 struct roc_sso_hwgrp_qos {
 	uint16_t hwgrp;
@@ -57,9 +57,7 @@ struct roc_sso {
 	uintptr_t lmt_base;
 	struct roc_sso_xaq_data xaq;
 	/* HW Const. */
-	uint32_t xae_waes;
-	uint32_t xaq_buf_size;
-	uint32_t iue;
+	struct sso_feat_info feat;
 	/* Private data. */
 #define ROC_SSO_MEM_SZ (16 * 1024)
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
@@ -103,6 +101,9 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);

+/* Utility function */
+uint16_t __roc_api roc_sso_pf_func_get(void);
+
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 877333b80c..de748ac409 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -516,6 +516,7 @@ INTERNAL {
 	roc_sso_hws_gwc_invalidate;
 	roc_sso_hws_unlink;
 	roc_sso_ns_to_gw;
+	roc_sso_pf_func_get;
 	roc_sso_rsrc_fini;
 	roc_sso_rsrc_init;
 	roc_tim_fini;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 88ea032bcb..dbebc5aef1 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -11,10 +11,7 @@

 #include <ethdev_driver.h>

-#include "roc_cpt.h"
-#include "roc_idev.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"

 #include "cn10k_cryptodev.h"
 #include "cn10k_cryptodev_event_dp.h"
diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
index ae00af5019..8d10bc9f9b 100644
--- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
@@ -8,14 +8,7 @@
 #include <rte_ip.h>
 #include <rte_vect.h>

-#include "roc_cpt.h"
-#if defined(__aarch64__)
-#include "roc_io.h"
-#else
-#include "roc_io_generic.h"
-#endif
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"

 #include "cn9k_cryptodev.h"
 #include "cn9k_cryptodev_ops.h"
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 531c489172..e04377d921 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -64,6 +64,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;

 	return ws;
 }
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index 372121465c..cfc0f94eca 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -23,6 +23,7 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	int64_t *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
+	uint16_t xae_waes;
 	int32_t xaq_lmt;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index a0e85face1..d23baafa6d 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */

+#include "roc_api.h"
+
 #include "cn10k_worker.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -80,8 +82,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
-	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) *
-	       ROC_SSO_XAE_PER_XAQ;
+	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) * ws->xae_waes;
 }

 static inline void
@@ -398,7 +399,7 @@ cn10k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
 	int32_t space;

 	/* Do a common back-pressure check and return */
-	space = sso_read_xaq_space(ws) - ROC_SSO_XAE_PER_XAQ;
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
 	if (space <= 0)
 		return 0;
 	nb_events = space < nb_events ? space : nb_events;
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 84a55511a3..ab7420ab79 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -2,7 +2,7 @@
  * Copyright(C) 2021 Marvell.
  */

-#include "roc_npa.h"
+#include "roc_api.h"

 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
@@ -47,7 +47,7 @@ cnxk_sso_xaq_allocate(struct cnxk_sso_evdev *dev)
 	if (dev->num_events > 0)
 		xae_cnt = dev->num_events;
 	else
-		xae_cnt = dev->sso.iue;
+		xae_cnt = dev->sso.feat.iue;

 	if (dev->xae_cnt)
 		xae_cnt += dev->xae_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index f147ef3c78..a24e5127a7 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -21,9 +21,6 @@

 #include "cnxk_eventdev_dp.h"

-#include "roc_platform.h"
-#include "roc_sso.h"
-
 #include "cnxk_tim_evdev.h"

 #define CNXK_SSO_XAE_CNT   "xae_cnt"
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 95c0f1b1f7..a371e44300 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -18,6 +18,8 @@
 #include <rte_random.h>
 #include <rte_test.h>

+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"

diff --git a/drivers/event/cnxk/cnxk_eventdev_stats.c b/drivers/event/cnxk/cnxk_eventdev_stats.c
index a8a87a06e4..6dea91aedf 100644
--- a/drivers/event/cnxk/cnxk_eventdev_stats.c
+++ b/drivers/event/cnxk/cnxk_eventdev_stats.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */

+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index bba70646fa..79036f7ed8 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -4,7 +4,7 @@

 #include <math.h>

-#include "roc_npa.h"
+#include "roc_api.h"

 #include "cnxk_eventdev.h"
 #include "cnxk_tim_evdev.h"
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 1f2f2fe5d8..5dcf6085dc 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */

+#include "roc_api.h"
+
 #include "cnxk_tim_evdev.h"
 #include "cnxk_tim_worker.h"

diff --git a/drivers/event/cnxk/cnxk_worker.c b/drivers/event/cnxk/cnxk_worker.c
index 60876abcff..a07c9185d9 100644
--- a/drivers/event/cnxk/cnxk_worker.c
+++ b/drivers/event/cnxk/cnxk_worker.c
@@ -6,9 +6,7 @@
 #include <rte_pmd_cnxk_eventdev.h>
 #include <rte_eventdev.h>

-#include "roc_platform.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"

 struct pwords {
 	uint64_t u[5];
--
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 02/21] event/cnxk: add CN20K specific device probe
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 03/21] event/cnxk: add CN20K device config pbhagavatula
                     ` (19 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add platform specific event device probe and remove, also add
event device info get function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 doc/guides/eventdevs/cnxk.rst          | 23 ++++---
 doc/guides/rel_notes/release_24_11.rst |  3 +
 drivers/common/cnxk/roc_sso.c          | 10 ++-
 drivers/event/cnxk/cn20k_eventdev.c    | 93 ++++++++++++++++++++++++++
 drivers/event/cnxk/meson.build         |  8 ++-
 5 files changed, 123 insertions(+), 14 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.c

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index e21846f4e0..55028f889b 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -16,6 +16,7 @@ Supported OCTEON cnxk SoCs
 
 - CN9XX
 - CN10XX
+- CN20XX
 
 Features
 --------
@@ -36,7 +37,7 @@ Features of the OCTEON cnxk SSO PMD are:
   DRAM
 - HW accelerated dequeue timeout support to enable power management
 - HW managed event timers support through TIM, with high precision and
-  time granularity of 2.5us on CN9K and 1us on CN10K.
+  time granularity of 2.5us on CN9K and 1us on CN10K/CN20K.
 - Up to 256 TIM rings a.k.a event timer adapters.
 - Up to 8 rings traversed in parallel.
 - HW managed packets enqueued from ethdev to eventdev exposed through event eth
@@ -45,8 +46,8 @@ Features of the OCTEON cnxk SSO PMD are:
 - Lockfree Tx from event eth Tx adapter using ``RTE_ETH_TX_OFFLOAD_MT_LOCKFREE``
   capability while maintaining receive packet order.
 - Full Rx/Tx offload support defined through ethdev queue configuration.
-- HW managed event vectorization on CN10K for packets enqueued from ethdev to
-  eventdev configurable per each Rx queue in Rx adapter.
+- HW managed event vectorization on CN10K/CN20K for packets enqueued from ethdev
+  to eventdev configurable per each Rx queue in Rx adapter.
 - Event vector transmission via Tx adapter.
 - Up to 2 event link profiles.
 
@@ -93,13 +94,13 @@ Runtime Config Options
 
     -a 0002:0e:00.0,qos=[1-50-50]
 
-- ``CN10K WQE stashing support``
+- ``CN10K/CN20K WQE stashing support``
 
-  CN10K supports stashing the scheduled WQE carried by `rte_event` to the
-  cores L2 Dcache. The number of cache lines to be stashed and the offset
-  is configurable per HWGRP i.e. event queue. The dictionary format is as
-  follows `[Qx|stash_offset|stash_length]` here the stash offset can be
-  a negative integer.
+  CN10K/CN20K supports stashing the scheduled WQE carried by `rte_event`
+  to the cores L2 Dcache. The number of cache lines to be stashed and the
+  offset is configurable per HWGRP i.e. event queue. The dictionary format
+  is as follows `[Qx|stash_offset|stash_length]` here the stash offset can
+  be a negative integer.
   By default, stashing is enabled on queues which have been connected to
   Rx adapter. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed.
 
@@ -188,8 +189,8 @@ Runtime Config Options
 
     -a 0002:0e:00.0,tim_eclk_freq=122880000-1000000000-0
 
-Power Saving on CN10K
----------------------
+Power Saving on CN10K/CN20K
+---------------------------
 
 ARM cores can additionally use WFE when polling for transactions on SSO bus
 to save power i.e., in the event dequeue call ARM core can enter WFE and exit
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index fa4822d928..c906262012 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -247,6 +247,9 @@ New Features
   Added ability for node to advertise and update multiple xstat counters,
   that can be retrieved using ``rte_graph_cluster_stats_get``.
 
+* **Updated Marvell cnxk event device driver.**
+
+  * Added eventdev driver support for CN20K SoC.
 
 Removed Items
 -------------
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 8a219b985b..45cf6fc39e 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -870,7 +870,10 @@ sso_update_msix_vec_count(struct roc_sso *roc_sso, uint16_t sso_vec_cnt)
 	if (idev == NULL)
 		return -ENODEV;
 
-	mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
+	if (roc_model_is_cn20k())
+		mbox_vec_cnt = RVU_MBOX_PF_INT_VEC_AFPF_MBOX + 1;
+	else
+		mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
 
 	/* Allocating vectors for the first time */
 	if (plt_intr_max_intr_get(pci_dev->intr_handle) == 0) {
@@ -1017,7 +1020,10 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 	}
 
 	/* 2 error interrupt per TIM LF */
-	sso_vec_cnt += 2 * nb_tim_lfs;
+	if (roc_model_is_cn20k())
+		sso_vec_cnt += 3 * nb_tim_lfs;
+	else
+		sso_vec_cnt += 2 * nb_tim_lfs;
 
 	rc = sso_update_msix_vec_count(roc_sso, sso_vec_cnt);
 	if (rc < 0) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
new file mode 100644
index 0000000000..c4b80f64f3
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include "roc_api.h"
+
+#include "cnxk_eventdev.h"
+
+static void
+cn20k_sso_set_rsrc(void *arg)
+{
+	struct cnxk_sso_evdev *dev = arg;
+
+	dev->max_event_ports = dev->sso.max_hws;
+	dev->max_event_queues = dev->sso.max_hwgrp > RTE_EVENT_MAX_QUEUES_PER_DEV ?
+					RTE_EVENT_MAX_QUEUES_PER_DEV :
+					dev->sso.max_hwgrp;
+}
+
+static void
+cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	dev_info->driver_name = RTE_STR(EVENTDEV_NAME_CN20K_PMD);
+	cnxk_sso_info_get(dev, dev_info);
+	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
+}
+
+static struct eventdev_ops cn20k_sso_dev_ops = {
+	.dev_infos_get = cn20k_sso_info_get,
+};
+
+static int
+cn20k_sso_init(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = roc_plt_init();
+	if (rc < 0) {
+		plt_err("Failed to initialize platform model");
+		return rc;
+	}
+
+	event_dev->dev_ops = &cn20k_sso_dev_ops;
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	rc = cnxk_sso_init(event_dev);
+	if (rc < 0)
+		return rc;
+
+	cn20k_sso_set_rsrc(cnxk_sso_pmd_priv(event_dev));
+	if (!dev->max_event_ports || !dev->max_event_queues) {
+		plt_err("Not enough eventdev resource queues=%d ports=%d", dev->max_event_queues,
+			dev->max_event_ports);
+		cnxk_sso_fini(event_dev);
+		return -ENODEV;
+	}
+
+	plt_sso_dbg("Initializing %s max_queues=%d max_ports=%d", event_dev->data->name,
+		    dev->max_event_queues, dev->max_event_ports);
+
+	return 0;
+}
+
+static int
+cn20k_sso_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	return rte_event_pmd_pci_probe(pci_drv, pci_dev, sizeof(struct cnxk_sso_evdev),
+				       cn20k_sso_init);
+}
+
+static const struct rte_pci_id cn20k_pci_sso_map[] = {
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_PF),
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_VF),
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver cn20k_pci_sso = {
+	.id_table = cn20k_pci_sso_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
+	.probe = cn20k_sso_probe,
+	.remove = cnxk_sso_remove,
+};
+
+RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
+RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
+RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 2c1060ad87..7de78eeba0 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -14,7 +14,7 @@ else
         soc_type = platform
 endif
 
-if soc_type != 'cn9k' and soc_type != 'cn10k'
+if soc_type != 'cn9k' and soc_type != 'cn10k' and soc_type != 'cn20k'
         soc_type = 'all'
 endif
 
@@ -325,6 +325,12 @@ sources += files(
 endif
 endif
 
+if soc_type == 'cn20k' or soc_type == 'all'
+sources += files(
+        'cn20k_eventdev.c',
+)
+endif
+
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
 if cc.get_id() == 'clang'
         extra_flags += ['-Wno-asm-operand-widths']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 03/21] event/cnxk: add CN20K device config
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 02/21] event/cnxk: add CN20K specific device probe pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 04/21] event/cnxk: add CN20k event queue configuration pbhagavatula
                     ` (18 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event device configuration that attaches the requested
number of SSO HWS(event ports) and HWGRP(event queues) LFs to
the RVU PF/VF.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index c4b80f64f3..753a976cd3 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -17,6 +17,17 @@ cn20k_sso_set_rsrc(void *arg)
 					dev->sso.max_hwgrp;
 }
 
+static int
+cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
+{
+	struct cnxk_tim_evdev *tim_dev = cnxk_tim_priv_get();
+	struct cnxk_sso_evdev *dev = arg;
+	uint16_t nb_tim_lfs;
+
+	nb_tim_lfs = tim_dev ? tim_dev->nb_rings : 0;
+	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -27,8 +38,33 @@ cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *de
 	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
 }
 
+static int
+cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = cnxk_sso_dev_validate(event_dev, 1, UINT32_MAX);
+	if (rc < 0) {
+		plt_err("Invalid event device configuration");
+		return -EINVAL;
+	}
+
+	rc = cn20k_sso_rsrc_init(dev, dev->nb_event_ports, dev->nb_event_queues);
+	if (rc < 0) {
+		plt_err("Failed to initialize SSO resources");
+		return -ENODEV;
+	}
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
+	.dev_configure = cn20k_sso_dev_configure,
+
+	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 04/21] event/cnxk: add CN20k event queue configuration
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 02/21] event/cnxk: add CN20K specific device probe pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 03/21] event/cnxk: add CN20K device config pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 05/21] event/cnxk: add CN20K event port configuration pbhagavatula
                     ` (17 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add setup and release functions for event queues i.e. SSO HWGRPs.
Allocate buffers in DRAM that hold inflight events.
Register device args to modify inflight event buffer count,
HWGRP QoS and stash.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c |  2 +-
 drivers/event/cnxk/cn20k_eventdev.c | 14 ++++++++++++++
 drivers/event/cnxk/cnxk_eventdev.c  |  4 ++--
 drivers/event/cnxk/cnxk_eventdev.h  |  2 +-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index e04377d921..d073529265 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -1319,7 +1319,7 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
-			      CN10K_SSO_STASH "=<string>"
+			      CNXK_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
 			      CNXK_TIM_RINGS_LMT "=<int>"
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 753a976cd3..b876c36806 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -56,6 +56,12 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 		return -ENODEV;
 	}
 
+	rc = cnxk_sso_xaq_allocate(dev);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+cnxk_rsrc_fini:
+	roc_sso_rsrc_fini(&dev->sso);
 	return rc;
 }
 
@@ -64,6 +70,10 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_configure = cn20k_sso_dev_configure,
 
 	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.queue_setup = cnxk_sso_queue_setup,
+	.queue_release = cnxk_sso_queue_release,
+	.queue_attr_set = cnxk_sso_queue_attribute_set,
+
 	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
@@ -127,3 +137,7 @@ static struct rte_pci_driver cn20k_pci_sso = {
 RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
 RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
 RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
+			      CNXK_SSO_XAE_CNT "=<int>"
+			      CNXK_SSO_GGRP_QOS "=<string>"
+			      CNXK_SSO_STASH "=<string>");
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index ab7420ab79..be6a487b59 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -624,8 +624,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
 			   &dev->force_ena_bp);
 	rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag,
 			   &single_ws);
-	rte_kvargs_process(kvlist, CN10K_SSO_STASH,
-			   &parse_sso_kvargs_stash_dict, dev);
+	rte_kvargs_process(kvlist, CNXK_SSO_STASH, &parse_sso_kvargs_stash_dict,
+			   dev);
 	dev->dual_ws = !single_ws;
 	rte_kvargs_free(kvlist);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index a24e5127a7..170c1134a7 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -27,7 +27,7 @@
 #define CNXK_SSO_GGRP_QOS  "qos"
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
-#define CN10K_SSO_STASH	   "stash"
+#define CNXK_SSO_STASH	   "stash"
 
 #define CNXK_SSO_MAX_PROFILES 2
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 05/21] event/cnxk: add CN20K event port configuration
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (2 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 04/21] event/cnxk: add CN20k event queue configuration pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 06/21] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
                     ` (16 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HWS a.k.a event port setup, release, link, unlink
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c         |  61 ++-----
 drivers/event/cnxk/cn20k_eventdev.c         | 172 ++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h         |  25 +++
 drivers/event/cnxk/cnxk_common.h            |  55 +++++++
 drivers/event/cnxk/cnxk_eventdev.h          |   6 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c |   6 +-
 6 files changed, 272 insertions(+), 53 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.h
 create mode 100644 drivers/event/cnxk/cnxk_common.h

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index d073529265..f9fcbf70d8 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -2,15 +2,16 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include <rte_dmadev_pmd.h>
+
+#include "cn10k_cryptodev_ops.h"
+#include "cn10k_ethdev.h"
 #include "cn10k_tx_worker.h"
 #include "cn10k_worker.h"
-#include "cn10k_ethdev.h"
-#include "cn10k_cryptodev_ops.h"
+#include "cnxk_common.h"
+#include "cnxk_dma_event_dp.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
-#include "cnxk_dma_event_dp.h"
-
-#include <rte_dmadev_pmd.h>
 
 #define CN10K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                           \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
@@ -18,29 +19,6 @@
 #define CN10K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                           \
 	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
 
-static uint32_t
-cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
-{
-	uint32_t wdata = 1;
-
-	if (dev->deq_tmo_ns)
-		wdata |= BIT(16);
-
-	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_NONE:
-	default:
-		break;
-	case CN10K_GW_MODE_PREF:
-		wdata |= BIT(19);
-		break;
-	case CN10K_GW_MODE_PREF_WFE:
-		wdata |= BIT(20) | BIT(19);
-		break;
-	}
-
-	return wdata;
-}
-
 static void *
 cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -61,7 +39,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
 	ws->hws_id = port_id;
 	ws->swtag_req = 0;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
 	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
@@ -219,12 +197,12 @@ cn10k_sso_hws_reset(void *arg, void *hws)
 	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
 
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
@@ -571,18 +549,7 @@ cn10k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
-	switch (event_dev->data->dev_conf.preschedule_type) {
-	default:
-	case RTE_EVENT_PRESCHEDULE_NONE:
-		dev->gw_mode = CN10K_GW_MODE_NONE;
-		break;
-	case RTE_EVENT_PRESCHEDULE:
-		dev->gw_mode = CN10K_GW_MODE_PREF;
-		break;
-	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
-		break;
-	}
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
 
 	rc = cnxk_setup_event_ports(event_dev, cn10k_sso_init_hws_mem,
 				    cn10k_sso_hws_setup);
@@ -665,13 +632,13 @@ cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	/* Check if we have work in PRF_WQE0, if so extract it. */
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
 		       BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index b876c36806..1d5115d47b 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,7 +4,85 @@
 
 #include "roc_api.h"
 
+#include "cn20k_eventdev.h"
+#include "cnxk_common.h"
 #include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static void *
+cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws;
+
+	/* Allocate event port memory */
+	ws = rte_zmalloc("cn20k_ws", sizeof(struct cn20k_sso_hws) + RTE_CACHE_LINE_SIZE,
+			 RTE_CACHE_LINE_SIZE);
+	if (ws == NULL) {
+		plt_err("Failed to alloc memory for port=%d", port_id);
+		return NULL;
+	}
+
+	/* First cache line is reserved for cookie */
+	ws = (struct cn20k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
+	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+	ws->hws_id = port_id;
+	ws->swtag_req = 0;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->gw_rdata = SSO_TT_EMPTY << 32;
+
+	return ws;
+}
+
+static int
+cn20k_sso_hws_link(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_link(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static int
+cn20k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_unlink(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static void
+cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t val;
+
+	ws->grp_base = grp_base;
+	ws->fc_mem = (int64_t *)dev->fc_iova;
+	ws->xaq_lmt = dev->xaq_lmt;
+	ws->fc_cache_space = dev->fc_cache_space;
+	ws->aw_lmt = dev->sso.lmt_base;
+
+	/* Set get_work timeout for HWS */
+	val = NSEC2USEC(dev->deq_tmo_ns);
+	val = val ? val - 1 : 0;
+	plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
+}
+
+static void
+cn20k_sso_hws_release(void *arg, void *hws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint16_t i, j;
+
+	for (i = 0; i < CNXK_SSO_MAX_PROFILES; i++)
+		for (j = 0; j < dev->nb_event_queues; j++)
+			roc_sso_hws_unlink(&dev->sso, ws->hws_id, &j, 1, i, 0);
+	memset(ws, 0, sizeof(*ws));
+}
 
 static void
 cn20k_sso_set_rsrc(void *arg)
@@ -60,11 +138,98 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
+
+	rc = cnxk_setup_event_ports(event_dev, cn20k_sso_init_hws_mem, cn20k_sso_hws_setup);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+	/* Restore any prior port-queue mapping. */
+	cnxk_sso_restore_links(event_dev, cn20k_sso_hws_link);
+
+	dev->configured = 1;
+	rte_mb();
+
+	return 0;
 cnxk_rsrc_fini:
 	roc_sso_rsrc_fini(&dev->sso);
+	dev->nb_event_ports = 0;
 	return rc;
 }
 
+static int
+cn20k_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
+		     const struct rte_event_port_conf *port_conf)
+{
+
+	RTE_SET_USED(port_conf);
+	return cnxk_sso_port_setup(event_dev, port_id, cn20k_sso_hws_setup);
+}
+
+static void
+cn20k_sso_port_release(void *port)
+{
+	struct cnxk_sso_hws_cookie *gws_cookie = cnxk_sso_hws_get_cookie(port);
+	struct cnxk_sso_evdev *dev;
+
+	if (port == NULL)
+		return;
+
+	dev = cnxk_sso_pmd_priv(gws_cookie->event_dev);
+	if (!gws_cookie->configured)
+		goto free;
+
+	cn20k_sso_hws_release(dev, port);
+	memset(gws_cookie, 0, sizeof(*gws_cookie));
+free:
+	rte_free(gws_cookie);
+}
+
+static int
+cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_links];
+	uint16_t link;
+
+	RTE_SET_USED(priorities);
+	for (link = 0; link < nb_links; link++)
+		hwgrp_ids[link] = queues[link];
+	nb_links = cn20k_sso_hws_link(dev, port, hwgrp_ids, nb_links, profile);
+
+	return (int)nb_links;
+}
+
+static int
+cn20k_sso_port_unlink_profile(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+			      uint16_t nb_unlinks, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_unlinks];
+	uint16_t unlink;
+
+	for (unlink = 0; unlink < nb_unlinks; unlink++)
+		hwgrp_ids[unlink] = queues[unlink];
+	nb_unlinks = cn20k_sso_hws_unlink(dev, port, hwgrp_ids, nb_unlinks, profile);
+
+	return (int)nb_unlinks;
+}
+
+static int
+cn20k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+		    const uint8_t priorities[], uint16_t nb_links)
+{
+	return cn20k_sso_port_link_profile(event_dev, port, queues, priorities, nb_links, 0);
+}
+
+static int
+cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+		      uint16_t nb_unlinks)
+{
+	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -75,6 +240,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.queue_attr_set = cnxk_sso_queue_attribute_set,
 
 	.port_def_conf = cnxk_sso_port_def_conf,
+	.port_setup = cn20k_sso_port_setup,
+	.port_release = cn20k_sso_port_release,
+	.port_link = cn20k_sso_port_link,
+	.port_unlink = cn20k_sso_port_unlink,
+	.port_link_profile = cn20k_sso_port_link_profile,
+	.port_unlink_profile = cn20k_sso_port_unlink_profile,
+	.timeout_ticks = cnxk_sso_timeout_ticks,
 };
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
new file mode 100644
index 0000000000..50a9df77cb
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef __CN20K_EVENTDEV_H__
+#define __CN20K_EVENTDEV_H__
+
+#define CN20K_SSO_DEFAULT_STASH_OFFSET -1
+#define CN20K_SSO_DEFAULT_STASH_LENGTH 2
+
+struct __rte_cache_aligned cn20k_sso_hws {
+	uint64_t base;
+	uint32_t gw_wdata;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+	/* Add Work Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
+	int64_t *fc_cache_space;
+	uintptr_t aw_lmt;
+	uintptr_t grp_base;
+	int32_t xaq_lmt;
+};
+
+#endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
new file mode 100644
index 0000000000..712d82bee7
--- /dev/null
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CNXK_COMMON_H__
+#define __CNXK_COMMON_H__
+
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static uint32_t
+cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
+{
+	uint32_t wdata = 1;
+
+	if (dev->deq_tmo_ns)
+		wdata |= BIT(16);
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	case CNXK_GW_MODE_PREF:
+		wdata |= BIT(19);
+		break;
+	case CNXK_GW_MODE_PREF_WFE:
+		wdata |= BIT(20) | BIT(19);
+		break;
+	}
+
+	return wdata;
+}
+
+static uint8_t
+cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
+{
+	uint8_t gw_mode = 0;
+
+	switch (preschedule_type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		gw_mode = CNXK_GW_MODE_NONE;
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		gw_mode = CNXK_GW_MODE_PREF;
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		gw_mode = CNXK_GW_MODE_PREF_WFE;
+		break;
+	}
+
+	return gw_mode;
+}
+
+#endif /* __CNXK_COMMON_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 170c1134a7..3605050709 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -38,9 +38,9 @@
 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
 #define CN9K_DUAL_WS_NB_WS	     2
 
-#define CN10K_GW_MODE_NONE     0
-#define CN10K_GW_MODE_PREF     1
-#define CN10K_GW_MODE_PREF_WFE 2
+#define CNXK_GW_MODE_NONE     0
+#define CNXK_GW_MODE_PREF     1
+#define CNXK_GW_MODE_PREF_WFE 2
 
 #define CNXK_QOS_NORMALIZE(val, min, max, cnt)                                 \
 	(min + val / ((max + cnt - 1) / cnt))
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index a371e44300..7b5e3e5cf9 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1568,15 +1568,15 @@ cnxk_sso_selftest(const char *dev_name)
 
 	if (roc_model_runtime_is_cn10k()) {
 		printf("Verifying CN10K workslot getwork mode none\n");
-		dev->gw_mode = CN10K_GW_MODE_NONE;
+		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF;
+		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
+		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 06/21] event/cnxk: add CN20K SSO enqueue fast path
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (3 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 05/21] event/cnxk: add CN20K event port configuration pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 07/21] event/cnxk: add CN20K SSO dequeue " pbhagavatula
                     ` (15 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton, Anatoly Burakov; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS fastpath event device enqueue functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  20 +-
 drivers/event/cnxk/cn20k_worker.c   | 380 ++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  21 ++
 drivers/event/cnxk/meson.build      |   1 +
 4 files changed, 421 insertions(+), 1 deletion(-)
 create mode 100644 drivers/event/cnxk/cn20k_worker.c
 create mode 100644 drivers/event/cnxk/cn20k_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 1d5115d47b..d2c0fcceee 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -5,6 +5,7 @@
 #include "roc_api.h"
 
 #include "cn20k_eventdev.h"
+#include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -106,6 +107,21 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+
+static void
+cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(RTE_ARCH_ARM64)
+
+	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
+	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
+	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -263,8 +279,10 @@ cn20k_sso_init(struct rte_eventdev *event_dev)
 
 	event_dev->dev_ops = &cn20k_sso_dev_ops;
 	/* For secondary processes, the primary has done all the work */
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		cn20k_sso_fp_fns_set(event_dev);
 		return 0;
+	}
 
 	rc = cnxk_sso_init(event_dev);
 	if (rc < 0)
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
new file mode 100644
index 0000000000..19db29e7a9
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -0,0 +1,380 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include <rte_vect.h>
+
+#include "roc_api.h"
+
+#include "cn20k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+/* SSO Operations */
+
+static __rte_always_inline uint8_t
+cn20k_sso_hws_new_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint64_t event_ptr = ev->u64;
+	const uint16_t grp = ev->queue_id;
+
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
+	if (ws->xaq_lmt <= *ws->fc_mem)
+		return 0;
+
+	cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grp_base + (grp << 12));
+	return 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_swtag(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
+
+	/* CNXK model
+	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
+	 *
+	 * SSO_TT_ORDERED        norm           norm             untag
+	 * SSO_TT_ATOMIC         norm           norm		   untag
+	 * SSO_TT_UNTAGGED       norm           norm             NOOP
+	 */
+
+	if (new_tt == SSO_TT_UNTAGGED) {
+		if (cur_tt != SSO_TT_UNTAGGED)
+			cnxk_sso_hws_swtag_untag(ws->base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+	} else {
+		cnxk_sso_hws_swtag_norm(tag, new_tt, ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
+	}
+	ws->swtag_req = 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_group(struct cn20k_sso_hws *ws, const struct rte_event *ev, const uint16_t grp)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+
+	plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+	cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_forward_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint8_t grp = ev->queue_id;
+
+	/* Group hasn't changed, Use SWTAG to forward the event */
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
+		cn20k_sso_hws_fwd_swtag(ws, ev);
+	else
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
+		cn20k_sso_hws_fwd_group(ws, ev, grp);
+}
+
+static inline int32_t
+sso_read_xaq_space(struct cn20k_sso_hws *ws)
+{
+	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, rte_memory_order_relaxed)) * 352;
+}
+
+static inline void
+sso_lmt_aw_wait_fc(struct cn20k_sso_hws *ws, int64_t req)
+{
+	int64_t cached, refill;
+
+retry:
+	while (__atomic_load_n(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
+		;
+
+	cached = __atomic_fetch_sub(ws->fc_cache_space, req, rte_memory_order_acquire) - req;
+	/* Check if there is enough space, else update and retry. */
+	if (cached < 0) {
+		/* Check if we have space else retry. */
+		do {
+			refill = sso_read_xaq_space(ws);
+		} while (refill <= 0);
+		__atomic_compare_exchange(ws->fc_cache_space, &cached, &refill, 0,
+					  rte_memory_order_release, rte_memory_order_relaxed);
+		goto retry;
+	}
+}
+
+#define VECTOR_SIZE_BITS	     0xFFFFFFFFFFF80000ULL
+#define VECTOR_GET_LINE_OFFSET(line) (19 + (3 * line))
+
+static uint64_t
+vector_size_partial_mask(uint16_t off, uint16_t cnt)
+{
+	return (VECTOR_SIZE_BITS & ~(~0x0ULL << off)) | ((uint64_t)(cnt - 1) << off);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_new_event_lmtst(struct cn20k_sso_hws *ws, uint8_t queue_id,
+			      const struct rte_event ev[], uint16_t n)
+{
+	uint16_t lines, partial_line, burst, left;
+	uint64_t wdata[2], pa[2] = {0};
+	uintptr_t lmt_addr;
+	uint16_t sz0, sz1;
+	uint16_t lmt_id;
+
+	sz0 = sz1 = 0;
+	lmt_addr = ws->aw_lmt;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	left = n;
+again:
+	burst = RTE_MIN(BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2 + ROC_LMT_LINES_PER_CORE_LOG2), left);
+
+	/* Set wdata */
+	lines = burst >> ROC_SSO_AW_PER_LMT_LINE_LOG2;
+	partial_line = burst & (BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2) - 1);
+	wdata[0] = wdata[1] = 0;
+	if (lines > BIT(ROC_LMT_LINES_PER_STR_LOG2)) {
+		wdata[0] = lmt_id;
+		wdata[0] |= 15ULL << 12;
+		wdata[0] |= VECTOR_SIZE_BITS;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = 16 << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+
+		wdata[1] = lmt_id + 16;
+		pa[1] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+
+		lines -= 17;
+		wdata[1] |= partial_line ? (uint64_t)(lines + 1) << 12 : (uint64_t)(lines << 12);
+		wdata[1] |= partial_line ? vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								    partial_line) :
+					   VECTOR_SIZE_BITS;
+		sz1 = burst - sz0;
+		partial_line = 0;
+	} else if (lines) {
+		/* We need to handle two cases here:
+		 * 1. Partial line spill over to wdata[1] i.e. lines == 16
+		 * 2. Partial line with spill lines < 16.
+		 */
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = lines << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+		if (lines == 16) {
+			wdata[0] |= 15ULL << 12;
+			wdata[0] |= VECTOR_SIZE_BITS;
+			if (partial_line) {
+				wdata[1] = lmt_id + 16;
+				pa[1] = (ws->grp_base + (queue_id << 12) +
+					 SSO_LF_GGRP_OP_AW_LMTST) |
+					((partial_line - 1) << 4);
+			}
+		} else {
+			lines -= 1;
+			wdata[0] |= partial_line ? (uint64_t)(lines + 1) << 12 :
+						   (uint64_t)(lines << 12);
+			wdata[0] |= partial_line ?
+					    vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								     partial_line) :
+					    VECTOR_SIZE_BITS;
+			sz0 += partial_line;
+		}
+		sz1 = burst - sz0;
+		partial_line = 0;
+	}
+
+	/* Only partial lines */
+	if (partial_line) {
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) |
+			((partial_line - 1) << 4);
+		sz0 = partial_line;
+		sz1 = burst - sz0;
+	}
+
+#if defined(RTE_ARCH_ARM64)
+	uint64x2_t aw_mask = {0xC0FFFFFFFFULL, ~0x0ULL};
+	uint64x2_t tt_mask = {0x300000000ULL, 0};
+	uint16_t parts;
+
+	while (burst) {
+		parts = burst > 7 ? 8 : plt_align32prevpow2(burst);
+		burst -= parts;
+		/* Lets try to fill at least one line per burst. */
+		switch (parts) {
+		case 8: {
+			uint64x2_t aw0, aw1, aw2, aw3, aw4, aw5, aw6, aw7;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+			aw4 = vandq_u64(vld1q_u64((const uint64_t *)&ev[4]), aw_mask);
+			aw5 = vandq_u64(vld1q_u64((const uint64_t *)&ev[5]), aw_mask);
+			aw6 = vandq_u64(vld1q_u64((const uint64_t *)&ev[6]), aw_mask);
+			aw7 = vandq_u64(vld1q_u64((const uint64_t *)&ev[7]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+			aw4 = vorrq_u64(vandq_u64(vshrq_n_u64(aw4, 6), tt_mask), aw4);
+			aw5 = vorrq_u64(vandq_u64(vshrq_n_u64(aw5, 6), tt_mask), aw5);
+			aw6 = vorrq_u64(vandq_u64(vshrq_n_u64(aw6, 6), tt_mask), aw6);
+			aw7 = vorrq_u64(vandq_u64(vshrq_n_u64(aw7, 6), tt_mask), aw7);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 64), aw4);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 80), aw5);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 96), aw6);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 112), aw7);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 128);
+		} break;
+		case 4: {
+			uint64x2_t aw0, aw1, aw2, aw3;
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 64);
+		} break;
+		case 2: {
+			uint64x2_t aw0, aw1;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 32);
+		} break;
+		case 1: {
+			__uint128_t aw0;
+
+			aw0 = ev[0].u64;
+			aw0 <<= 64;
+			aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+			aw0 |= (uint64_t)ev[0].sched_type << 32;
+
+			*((__uint128_t *)lmt_addr) = aw0;
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+		} break;
+		}
+		ev += parts;
+	}
+#else
+	uint16_t i;
+
+	for (i = 0; i < burst; i++) {
+		__uint128_t aw0;
+
+		aw0 = ev[0].u64;
+		aw0 <<= 64;
+		aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+		aw0 |= (uint64_t)ev[0].sched_type << 32;
+		*((__uint128_t *)lmt_addr) = aw0;
+		lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+	}
+#endif
+
+	/* wdata[0] will be always valid */
+	sso_lmt_aw_wait_fc(ws, sz0);
+	roc_lmt_submit_steorl(wdata[0], pa[0]);
+	if (wdata[1]) {
+		sso_lmt_aw_wait_fc(ws, sz1);
+		roc_lmt_submit_steorl(wdata[1], pa[1]);
+	}
+
+	left -= (sz0 + sz1);
+	if (left)
+		goto again;
+
+	return n;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	switch (ev->op) {
+	case RTE_EVENT_OP_NEW:
+		return cn20k_sso_hws_new_event(ws, ev);
+	case RTE_EVENT_OP_FORWARD:
+		cn20k_sso_hws_forward_event(ws, ev);
+		break;
+	case RTE_EVENT_OP_RELEASE:
+		if (ws->swtag_req) {
+			cnxk_sso_hws_desched(ev->u64, ws->base);
+			ws->swtag_req = 0;
+			break;
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	uint16_t idx = 0, done = 0, rc = 0;
+	struct cn20k_sso_hws *ws = port;
+	uint8_t queue_id;
+	int32_t space;
+
+	/* Do a common back-pressure check and return */
+	space = sso_read_xaq_space(ws) - 352;
+	if (space <= 0)
+		return 0;
+	nb_events = space < nb_events ? space : nb_events;
+
+	do {
+		queue_id = ev[idx].queue_id;
+		for (idx = idx + 1; idx < nb_events; idx++)
+			if (queue_id != ev[idx].queue_id)
+				break;
+
+		rc = cn20k_sso_hws_new_event_lmtst(ws, queue_id, &ev[done], idx - done);
+		if (rc != (idx - done))
+			return rc + done;
+		done += rc;
+
+	} while (done < nb_events);
+
+	return done;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	cn20k_sso_hws_forward_event(ws, ev);
+
+	return 1;
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
new file mode 100644
index 0000000000..5ff8f11b38
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_WORKER_H__
+#define __CN20K_WORKER_H__
+
+#include <rte_eventdev.h>
+
+#include "cnxk_worker.h"
+#include "cn20k_eventdev.h"
+
+/* CN20K Fastpath functions. */
+uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
+					   uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 7de78eeba0..ee27d38115 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -328,6 +328,7 @@ endif
 if soc_type == 'cn20k' or soc_type == 'all'
 sources += files(
         'cn20k_eventdev.c',
+        'cn20k_worker.c',
 )
 endif
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 07/21] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (4 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 06/21] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 08/21] event/cnxk: add CN20K event port quiesce pbhagavatula
                     ` (14 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS event dequeue fastpath functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |   5 +
 drivers/event/cnxk/cn20k_worker.c   |  54 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   | 137 +++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index d2c0fcceee..6c810ed77d 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -112,11 +112,16 @@ static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
+	if (dev->deq_tmo_ns)
+		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
+
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 19db29e7a9..075461c3ff 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -378,3 +378,57 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 
 	return 1;
 }
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(timeout_ticks);
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return 1;
+	}
+
+	return cn20k_sso_hws_get_work(ws, ev, 0);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, 0);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, 0);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5ff8f11b38..8dc60a06ec 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -7,8 +7,136 @@
 
 #include <rte_eventdev.h>
 
-#include "cnxk_worker.h"
 #include "cn20k_eventdev.h"
+#include "cnxk_worker.h"
+
+static __rte_always_inline void
+cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
+{
+	RTE_SET_USED(ws);
+	RTE_SET_USED(flags);
+
+	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
+		 (u64[0] & 0xffffffff);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+	gw.get_work = ws->gw_wdata;
+#if defined(RTE_ARCH_ARM64)
+#if defined(__clang__)
+	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbz %[x0], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(".arch armv8-a+lse\n"
+		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+	gw.u64[0] = x0;
+	gw.u64[1] = x1;
+#else
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbz %[wdata], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [wdata] "=&r"(gw.get_work)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+		     : [wdata] "+r"(gw.get_work)
+		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+#endif
+#else
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
+#endif
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
+
+/* Used in cleaning up workslot. */
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work_empty(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+#ifdef RTE_ARCH_ARM64
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbz %[tag], 63, .Ldone%=		\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbnz %[tag], 63, .Lrty%=		\n"
+		     ".Ldone%=:	dmb ld					\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
+		     : "memory");
+#else
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+#endif
+
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+	else
+		gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+			    (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
 
 /* CN20K Fastpath functions. */
 uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
@@ -18,4 +146,11 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 
+uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+					   uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
+					       uint16_t nb_events, uint64_t timeout_ticks);
+
 #endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 08/21] event/cnxk: add CN20K event port quiesce
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (5 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 07/21] event/cnxk: add CN20K SSO dequeue " pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 09/21] event/cnxk: add CN20K event port profile switch pbhagavatula
                     ` (13 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port quiesce function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 60 +++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 6c810ed77d..c435981fa0 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -206,6 +206,65 @@ cn20k_sso_port_release(void *port)
 	rte_free(gws_cookie);
 }
 
+static void
+cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+		       rte_eventdev_port_flush_t flush_cb, void *args)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct cn20k_sso_hws *ws = port;
+	struct rte_event ev;
+	uint64_t ptag;
+	bool is_pend;
+
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+		is_pend = true;
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	if (is_pend && ev.u64)
+		if (flush_cb)
+			flush_cb(event_dev->data->dev_id, ev, args);
+	ptag = (plt_read64(ws->base + SSOW_LF_GWS_TAG) >> 32) & SSO_TT_EMPTY;
+	if (ptag != SSO_TT_EMPTY)
+		cnxk_sso_hws_swtag_flush(ws->base);
+
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & BIT_ULL(56));
+
+	/* Check if we have work in PRF_WQE0, if so extract it. */
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (ev.u64) {
+			if (flush_cb)
+				flush_cb(event_dev->data->dev_id, ev, args);
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		do {
+			ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (ptag & BIT_ULL(56));
+	}
+	ws->swtag_req = 0;
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
 			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
@@ -263,6 +322,7 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_def_conf = cnxk_sso_port_def_conf,
 	.port_setup = cn20k_sso_port_setup,
 	.port_release = cn20k_sso_port_release,
+	.port_quiesce = cn20k_sso_port_quiesce,
 	.port_link = cn20k_sso_port_link,
 	.port_unlink = cn20k_sso_port_unlink,
 	.port_link_profile = cn20k_sso_port_link_profile,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 09/21] event/cnxk: add CN20K event port profile switch
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (6 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 08/21] event/cnxk: add CN20K event port quiesce pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 10/21] event/cnxk: add CN20K event port preschedule pbhagavatula
                     ` (12 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port profile switch.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  1 +
 drivers/event/cnxk/cn20k_worker.c   | 11 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  1 +
 3 files changed, 13 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index c435981fa0..13d293f1d3 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -122,6 +122,7 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 	if (dev->deq_tmo_ns)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
+	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 075461c3ff..224ab64b1e 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -379,6 +379,17 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 	return 1;
 }
 
+int __rte_hot
+cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(0xFFUL);
+	ws->gw_wdata |= (profile + 1);
+
+	return 0;
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 8dc60a06ec..447f28f0f2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -145,6 +145,7 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 					       uint16_t nb_events);
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
+int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 10/21] event/cnxk: add CN20K event port preschedule
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (7 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 09/21] event/cnxk: add CN20K event port profile switch pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 11/21] event/cnxk: add CN20K device start pbhagavatula
                     ` (11 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port preschedule modify and preschedule
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/ssow.h       |  1 +
 drivers/event/cnxk/cn20k_eventdev.c |  2 ++
 drivers/event/cnxk/cn20k_worker.c   | 30 +++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  3 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
index c146a8c3ef..ec6bd7896b 100644
--- a/drivers/common/cnxk/hw/ssow.h
+++ b/drivers/common/cnxk/hw/ssow.h
@@ -37,6 +37,7 @@
 #define SSOW_LF_GWS_PRF_WQE1	     (0x448ull) /* [CN10K, .) */
 #define SSOW_LF_GWS_OP_GET_WORK0     (0x600ull)
 #define SSOW_LF_GWS_OP_GET_WORK1     (0x608ull) /* [CN10K, .) */
+#define SSOW_LF_GWS_OP_PRF_GETWORK   (0x610ull) /* [CN20K, .) */
 #define SSOW_LF_GWS_OP_SWTAG_FLUSH   (0x800ull)
 #define SSOW_LF_GWS_OP_SWTAG_UNTAG   (0x810ull)
 #define SSOW_LF_GWS_OP_SWTP_CLR	     (0x820ull)
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 13d293f1d3..6289480c42 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -123,6 +123,8 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
+	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
+	event_dev->preschedule = cn20k_sso_hws_preschedule;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 224ab64b1e..b0196e9885 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -390,6 +390,36 @@ cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
 	return 0;
 }
 
+int __rte_hot
+cn20k_sso_hws_preschedule_modify(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(BIT(19) | BIT(20));
+	switch (type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		ws->gw_wdata |= BIT(19);
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		ws->gw_wdata |= BIT(19) | BIT(20);
+		break;
+	}
+
+	return 0;
+}
+
+void __rte_hot
+cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(type);
+	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 447f28f0f2..dd8b72bc53 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -146,6 +146,9 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
+int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
+					       enum rte_event_dev_preschedule_type type);
+void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 11/21] event/cnxk: add CN20K device start
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (8 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 10/21] event/cnxk: add CN20K event port preschedule pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 12/21] event/cnxk: add CN20K device stop and close pbhagavatula
                     ` (10 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K start function along with few cleanup API's to maintain
sanity.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c | 103 +--------------------------
 drivers/event/cnxk/cn20k_eventdev.c |  76 ++++++++++++++++++++
 drivers/event/cnxk/cnxk_common.h    | 104 ++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 100 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index f9fcbf70d8..88bf5ca6d7 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -153,83 +153,6 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
 	return 0;
 }
 
-static void
-cn10k_sso_hws_reset(void *arg, void *hws)
-{
-	struct cnxk_sso_evdev *dev = arg;
-	struct cn10k_sso_hws *ws = hws;
-	uintptr_t base = ws->base;
-	uint64_t pend_state;
-	union {
-		__uint128_t wdata;
-		uint64_t u64[2];
-	} gw;
-	uint8_t pend_tt;
-	bool is_pend;
-
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
-	/* Wait till getwork/swtp/waitw/desched completes. */
-	is_pend = false;
-	/* Work in WQE0 is always consumed, unless its a SWTAG. */
-	pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
-	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
-	    ws->swtag_req)
-		is_pend = true;
-
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
-			       BIT_ULL(56) | BIT_ULL(54)));
-	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
-			cnxk_sso_hws_swtag_untag(base +
-						 SSOW_LF_GWS_OP_SWTAG_UNTAG);
-		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-	} else if (pend_tt != SSO_TT_EMPTY) {
-		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-	}
-
-	/* Wait for desched to complete. */
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
-
-	switch (dev->gw_mode) {
-	case CNXK_GW_MODE_PREF:
-	case CNXK_GW_MODE_PREF_WFE:
-		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
-			;
-		break;
-	case CNXK_GW_MODE_NONE:
-	default:
-		break;
-	}
-
-	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
-	    SSO_TT_EMPTY) {
-		plt_write64(BIT_ULL(16) | 1,
-			    ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		do {
-			roc_load_pair(gw.u64[0], gw.u64[1],
-				      ws->base + SSOW_LF_GWS_WQE0);
-		} while (gw.u64[0] & BIT_ULL(63));
-		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-			if (pend_tt == SSO_TT_ATOMIC ||
-			    pend_tt == SSO_TT_ORDERED)
-				cnxk_sso_hws_swtag_untag(
-					base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
-			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-		}
-	}
-
-	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	rte_mb();
-}
-
 static void
 cn10k_sso_set_rsrc(void *arg)
 {
@@ -707,24 +630,6 @@ cn10k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn10k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
-static void
-cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev)
-{
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
-	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
-	int i, rc;
-
-	plt_sso_dbg();
-	for (i = 0; i < dev->stash_cnt; i++) {
-		stash[i].hwgrp = dev->stash_parse_data[i].queue;
-		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
-		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
-	}
-	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
-	if (rc < 0)
-		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
-}
-
 static int
 cn10k_sso_start(struct rte_eventdev *event_dev)
 {
@@ -736,9 +641,8 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
 	if (rc < 0)
 		return rc;
 
-	cn10k_sso_configure_queue_stash(event_dev);
-	rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
-			    cn10k_sso_hws_flush_events);
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 	if (rc < 0)
 		return rc;
 	cn10k_sso_fp_fns_set(event_dev);
@@ -759,8 +663,7 @@ cn10k_sso_stop(struct rte_eventdev *event_dev)
 	for (i = 0; i < event_dev->data->nb_ports; i++)
 		hws[i] = i;
 	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
-	cnxk_sso_stop(event_dev, cn10k_sso_hws_reset,
-		      cn10k_sso_hws_flush_events);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 }
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 6289480c42..12e0513d95 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -85,6 +85,61 @@ cn20k_sso_hws_release(void *arg, void *hws)
 	memset(ws, 0, sizeof(*ws));
 }
 
+static int
+cn20k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cnxk_handle_event_t fn,
+			   void *arg)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg);
+	uint64_t retry = CNXK_SSO_FLUSH_RETRY_MAX;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t cq_ds_cnt = 1;
+	uint64_t aq_cnt = 1;
+	uint64_t ds_cnt = 1;
+	struct rte_event ev;
+	uint64_t val, req;
+
+	plt_write64(0, base + SSO_LF_GGRP_QCTL);
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	req = queue_id;	    /* GGRP ID */
+	req |= BIT_ULL(18); /* Grouped */
+	req |= BIT_ULL(16); /* WAIT */
+
+	aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+	ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+	cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+	cq_ds_cnt &= 0x3FFF3FFF0000;
+
+	while (aq_cnt || cq_ds_cnt || ds_cnt) {
+		plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (fn != NULL && ev.u64 != 0)
+			fn(arg, ev);
+		if (ev.sched_type != SSO_TT_EMPTY)
+			cnxk_sso_hws_swtag_flush(ws->base);
+		else if (retry-- == 0)
+			break;
+		do {
+			val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (val & BIT_ULL(56));
+		aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+		ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+		cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+		/* Extract cq and ds count */
+		cq_ds_cnt &= 0x3FFF3FFF0000;
+	}
+
+	if (aq_cnt || cq_ds_cnt || ds_cnt)
+		return -EAGAIN;
+
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	rte_mb();
+
+	return 0;
+}
+
 static void
 cn20k_sso_set_rsrc(void *arg)
 {
@@ -313,6 +368,25 @@ cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
+static int
+cn20k_sso_start(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int rc, i;
+
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set(event_dev);
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -331,6 +405,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_link_profile = cn20k_sso_port_link_profile,
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
+
+	.dev_start = cn20k_sso_start,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
index 712d82bee7..c361d0530d 100644
--- a/drivers/event/cnxk/cnxk_common.h
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -8,6 +8,15 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+struct cnxk_sso_hws_prf {
+	uint64_t base;
+	uint32_t gw_wdata;
+	void *lookup_mem;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+};
+
 static uint32_t
 cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
 {
@@ -52,4 +61,99 @@ cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
 	return gw_mode;
 }
 
+static void
+cnxk_sso_hws_reset(void *arg, void *ws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cnxk_sso_hws_prf *ws_prf;
+	uint64_t pend_state;
+	uint8_t swtag_req;
+	uintptr_t base;
+	uint8_t hws_id;
+	union {
+		__uint128_t wdata;
+		uint64_t u64[2];
+	} gw;
+	uint8_t pend_tt;
+	bool is_pend;
+
+	ws_prf = ws;
+	base = ws_prf->base;
+	hws_id = ws_prf->hws_id;
+	swtag_req = ws_prf->swtag_req;
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	/* Wait till getwork/swtp/waitw/desched completes. */
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || swtag_req)
+		is_pend = true;
+
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state &
+		 (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+			cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+	} else if (pend_tt != SSO_TT_EMPTY) {
+		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+	}
+
+	/* Wait for desched to complete. */
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, base + SSOW_LF_GWS_OP_GET_WORK0);
+		do {
+			roc_load_pair(gw.u64[0], gw.u64[1], base + SSOW_LF_GWS_WQE0);
+		} while (gw.u64[0] & BIT_ULL(63));
+		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+			if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+				cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+		}
+	}
+
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	rte_mb();
+}
+
+static void
+cnxk_sso_configure_queue_stash(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
+	int i, rc;
+
+	plt_sso_dbg();
+	for (i = 0; i < dev->stash_cnt; i++) {
+		stash[i].hwgrp = dev->stash_parse_data[i].queue;
+		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
+		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
+	}
+	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
+	if (rc < 0)
+		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+}
+
 #endif /* __CNXK_COMMON_H__ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 12/21] event/cnxk: add CN20K device stop and close
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (9 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 11/21] event/cnxk: add CN20K device start pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 13/21] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
                     ` (9 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event device stop and close callback functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 12e0513d95..84cb6b8d79 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -387,6 +387,25 @@ cn20k_sso_start(struct rte_eventdev *event_dev)
 	return rc;
 }
 
+static void
+cn20k_sso_stop(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int i;
+
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+}
+
+static int
+cn20k_sso_close(struct rte_eventdev *event_dev)
+{
+	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -407,6 +426,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
 	.dev_start = cn20k_sso_start,
+	.dev_stop = cn20k_sso_stop,
+	.dev_close = cn20k_sso_close,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 13/21] event/cnxk: add CN20K xstats, selftest and dump
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (10 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 12/21] event/cnxk: add CN20K device stop and close pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 14/21] event/cnxk: support CN20K Rx adapter pbhagavatula
                     ` (8 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add selftest to verify SSO, xstats to get queue specific
stats and add function to dump internal state of SSO.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test/test_eventdev.c                    |  7 +++++++
 drivers/event/cnxk/cn20k_eventdev.c         | 12 ++++++++++++
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  8 ++++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c
index a9258d2abc..f4da07c596 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -1521,6 +1521,12 @@ test_eventdev_selftest_cn10k(void)
 	return test_eventdev_selftest_impl("event_cn10k", "");
 }
 
+static int
+test_eventdev_selftest_cn20k(void)
+{
+	return test_eventdev_selftest_impl("event_cn20k", "");
+}
+
 #endif /* !RTE_EXEC_ENV_WINDOWS */
 
 REGISTER_FAST_TEST(eventdev_common_autotest, true, true, test_eventdev_common);
@@ -1532,5 +1538,6 @@ REGISTER_DRIVER_TEST(eventdev_selftest_dpaa2, test_eventdev_selftest_dpaa2);
 REGISTER_DRIVER_TEST(eventdev_selftest_dlb2, test_eventdev_selftest_dlb2);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn9k, test_eventdev_selftest_cn9k);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn10k, test_eventdev_selftest_cn10k);
+REGISTER_DRIVER_TEST(eventdev_selftest_cn20k, test_eventdev_selftest_cn20k);
 
 #endif /* !RTE_EXEC_ENV_WINDOWS */
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 84cb6b8d79..dad2c567d0 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -406,6 +406,12 @@ cn20k_sso_close(struct rte_eventdev *event_dev)
 	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
 }
 
+static int
+cn20k_sso_selftest(void)
+{
+	return cnxk_sso_selftest(RTE_STR(event_cn20k));
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -425,9 +431,15 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.xstats_get = cnxk_sso_xstats_get,
+	.xstats_reset = cnxk_sso_xstats_reset,
+	.xstats_get_names = cnxk_sso_xstats_get_names,
+
+	.dump = cnxk_sso_dump,
 	.dev_start = cn20k_sso_start,
 	.dev_stop = cn20k_sso_stop,
 	.dev_close = cn20k_sso_close,
+	.dev_selftest = cn20k_sso_selftest,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 7b5e3e5cf9..f7995faeca 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1566,16 +1566,16 @@ cnxk_sso_selftest(const char *dev_name)
 			return rc;
 	}
 
-	if (roc_model_runtime_is_cn10k()) {
-		printf("Verifying CN10K workslot getwork mode none\n");
+	if (roc_model_runtime_is_cn10k() || roc_model_runtime_is_cn20k()) {
+		printf("Verifying %s workslot getwork mode none\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode prefetch\n");
+		printf("Verifying %s workslot getwork mode prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
+		printf("Verifying %s workslot getwork mode smart prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 14/21] event/cnxk: support CN20K Rx adapter
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (11 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 13/21] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 15/21] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
                     ` (7 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for CN20K event eth Rx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 121 +++++++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_eventdev.h |   4 +
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index dad2c567d0..ea2b218373 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,6 +4,7 @@
 
 #include "roc_api.h"
 
+#include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
@@ -412,6 +413,117 @@ cn20k_sso_selftest(void)
 	return cnxk_sso_selftest(RTE_STR(event_cn20k));
 }
 
+static int
+cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+			      const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+	int rc;
+
+	RTE_SET_USED(event_dev);
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 9);
+	if (rc)
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+	else
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+	return 0;
+}
+
+static void
+cn20k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		ws->xaq_lmt = dev->xaq_lmt;
+		ws->fc_mem = (int64_t *)dev->fc_iova;
+		ws->tstamp = dev->tstamp;
+		if (lookup_mem)
+			ws->lookup_mem = lookup_mem;
+	}
+}
+
+static void
+eventdev_fops_tstamp_update(struct rte_eventdev *event_dev)
+{
+	struct rte_event_fp_ops *fp_op = rte_event_fp_ops + event_dev->data->dev_id;
+
+	fp_op->dequeue_burst = event_dev->dequeue_burst;
+}
+
+static void
+cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct cnxk_eth_dev *cnxk_eth_dev = dev->data->dev_private;
+	struct rte_eventdev *event_dev = cnxk_eth_dev->evdev_priv;
+	struct cnxk_sso_evdev *evdev = cnxk_sso_pmd_priv(event_dev);
+
+	evdev->rx_offloads |= flags;
+	if (ptp_en)
+		evdev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
+	else
+		evdev->tstamp[port_id] = NULL;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	eventdev_fops_tstamp_update(event_dev);
+}
+
+static int
+cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
+			       const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash;
+	struct cn20k_eth_rxq *rxq;
+	void *lookup_mem;
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	if (rc)
+		return -EINVAL;
+
+	cnxk_eth_dev->cnxk_sso_ptp_tstamp_cb = cn20k_sso_tstamp_hdl_update;
+	cnxk_eth_dev->evdev_priv = (struct rte_eventdev *)(uintptr_t)event_dev;
+
+	rxq = eth_dev->data->rx_queues[0];
+	lookup_mem = rxq->lookup_mem;
+	cn20k_sso_set_priv_mem(event_dev, lookup_mem);
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	if (roc_feature_sso_has_stash() && dev->nb_event_ports > 1) {
+		stash.hwgrp = queue_conf->ev.queue_id;
+		stash.stash_offset = CN20K_SSO_DEFAULT_STASH_OFFSET;
+		stash.stash_count = CN20K_SSO_DEFAULT_STASH_LENGTH;
+		rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1);
+		if (rc < 0)
+			plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+	}
+
+	return 0;
+}
+
+static int
+cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id)
+{
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -431,6 +543,12 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.eth_rx_adapter_caps_get = cn20k_sso_rx_adapter_caps_get,
+	.eth_rx_adapter_queue_add = cn20k_sso_rx_adapter_queue_add,
+	.eth_rx_adapter_queue_del = cn20k_sso_rx_adapter_queue_del,
+	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -507,4 +625,5 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
-			      CNXK_SSO_STASH "=<string>");
+			      CNXK_SSO_STASH "=<string>"
+			      CNXK_SSO_FORCE_BP "=1");
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 50a9df77cb..ce12628f81 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -11,9 +11,13 @@
 struct __rte_cache_aligned cn20k_sso_hws {
 	uint64_t base;
 	uint32_t gw_wdata;
+	void *lookup_mem;
 	uint64_t gw_rdata;
 	uint8_t swtag_req;
 	uint8_t hws_id;
+	/* PTP timestamp */
+	struct cnxk_timesync_info **tstamp;
+	uint64_t meta_aura;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
 	int64_t *fc_cache_space;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 15/21] event/cnxk: support CN20K Rx adapter fast path
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (12 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 14/21] event/cnxk: support CN20K Rx adapter pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 16/21] event/cnxk: support CN20K Tx adapter pbhagavatula
                     ` (6 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c           | 122 ++++++++++++-
 drivers/event/cnxk/cn20k_worker.c             |  54 ------
 drivers/event/cnxk/cn20k_worker.h             | 165 +++++++++++++++++-
 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c |  22 +++
 .../cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c   |  22 +++
 .../event/cnxk/deq/cn20k/deq_112_127_burst.c  |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_seg_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_tmo_burst.c    |  22 +++
 .../deq/cn20k/deq_112_127_tmo_seg_burst.c     |  22 +++
 .../event/cnxk/deq/cn20k/deq_16_31_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_32_47_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_burst.c      |  23 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_48_63_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_64_79_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_80_95_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_96_111_burst.c   |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_seg_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_all_offload.c    |  65 +++++++
 drivers/event/cnxk/meson.build                |  43 +++++
 37 files changed, 1085 insertions(+), 69 deletions(-)
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index ea2b218373..90c34b1d62 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -11,6 +11,9 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+#define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
+	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -163,21 +166,124 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+#if defined(RTE_ARCH_ARM64)
+static inline void
+cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
+{
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	const event_dequeue_burst_t sso_hws_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_seg_burst);
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_seg_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_deq_seg_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_seg_burst);
+		}
+	} else {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst, sso_hws_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_burst);
+		}
+	}
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
+static inline void
+cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
+	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
+		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+#endif
 
 static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	cn20k_sso_fp_blk_fns_set(event_dev);
+	cn20k_sso_fp_tmplt_fns_set(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
-	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
-	if (dev->deq_tmo_ns)
-		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
-
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
 	event_dev->preschedule = cn20k_sso_hws_preschedule;
@@ -284,7 +390,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
 	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
 
-	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	cn20k_sso_hws_get_work_empty(ws, &ev,
+				     (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 	if (is_pend && ev.u64)
 		if (flush_cb)
 			flush_cb(event_dev->data->dev_id, ev, args);
@@ -310,7 +417,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
 		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		cn20k_sso_hws_get_work_empty(
+			ws, &ev, (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 		if (ev.u64) {
 			if (flush_cb)
 				flush_cb(event_dev->data->dev_id, ev, args);
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index b0196e9885..b67f921b67 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -419,57 +419,3 @@ cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
 	RTE_SET_USED(type);
 	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
 }
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return 1;
-	}
-
-	return cn20k_sso_hws_get_work(ws, ev, 0);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return ret;
-	}
-
-	ret = cn20k_sso_hws_get_work(ws, ev, 0);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn20k_sso_hws_get_work(ws, ev, 0);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			    uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index dd8b72bc53..9075073fd2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -8,16 +8,64 @@
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
+#include "cn20k_rx.h"
 #include "cnxk_worker.h"
 
+/* CN20K Rx event fastpath */
+
+static __rte_always_inline void
+cn20k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id, const uint32_t tag,
+		  const uint32_t flags, const void *const lookup_mem, uintptr_t cpth,
+		  uintptr_t sa_base)
+{
+	const uint64_t mbuf_init =
+		0x100010000ULL | RTE_PKTMBUF_HEADROOM | (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+	struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf;
+
+	cn20k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, (struct rte_mbuf *)mbuf, lookup_mem,
+			      mbuf_init | ((uint64_t)port_id) << 48, cpth, sa_base, flags);
+}
+
+static void
+cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info *tstamp)
+{
+	uint64_t tstamp_ptr;
+	uint8_t laptr;
+
+	laptr = (uint8_t)*(uint64_t *)(u64 + (CNXK_SSO_WQE_LAYR_PTR * sizeof(uint64_t)));
+	if (laptr == sizeof(uint64_t)) {
+		/* Extracting tstamp, if PTP enabled*/
+		tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64) + CNXK_SSO_WQE_SG_PTR);
+		cn20k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, true,
+					 (uint64_t *)tstamp_ptr);
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
-	RTE_SET_USED(ws);
-	RTE_SET_USED(flags);
+	uintptr_t sa_base = 0;
 
 	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
 		 (u64[0] & 0xffffffff);
+	if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		uintptr_t cpth = 0;
+		uint64_t mbuf;
+
+		mbuf = u64[1] - sizeof(struct rte_mbuf);
+		rte_prefetch0((void *)mbuf);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(((struct rte_mbuf *)mbuf)->pool, (void **)&mbuf, 1, 1);
+
+		u64[0] = CNXK_CLR_SUB_EVENT(u64[0]);
+		cn20k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xFFFFF, flags, ws->lookup_mem, cpth,
+				  sa_base);
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
+		u64[1] = mbuf;
+	}
 }
 
 static __rte_always_inline uint16_t
@@ -150,11 +198,112 @@ int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
 					       enum rte_event_dev_preschedule_type type);
 void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
-uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-					   uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-					       uint16_t nb_events, uint64_t timeout_ticks);
+#define R(name, flags)                                                                             \
+	uint16_t __rte_hot cn20k_sso_hws_deq_burst_##name(                                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_burst_##name(                                      \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_seg_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_seg_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_seg_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_seg_burst_##name(                              \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_burst_##name(                                    \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_seg_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_seg_burst_##name(                            \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_seg_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_seg_burst_##name(                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_DEQ(fn, flags)                                                                         \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(timeout_ticks);                                                       \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return 1;                                                                  \
+		}                                                                                  \
+		return cn20k_sso_hws_get_work(ws, ev, flags);                                      \
+	}
+
+#define SSO_DEQ_SEG(fn, flags) SSO_DEQ(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define SSO_DEQ_TMO(fn, flags)                                                                     \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint16_t ret = 1;                                                                  \
+		uint64_t iter;                                                                     \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return ret;                                                                \
+		}                                                                                  \
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);                                       \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)                         \
+			ret = cn20k_sso_hws_get_work(ws, ev, flags);                               \
+		return ret;                                                                        \
+	}
+
+#define SSO_DEQ_TMO_SEG(fn, flags) SSO_DEQ_TMO(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define R(name, flags)                                                                             \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                                                   \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)                              \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                                           \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                                           \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)                                   \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_CMN_DEQ_BURST(fnb, fn, flags)                                                          \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+#define SSO_CMN_DEQ_SEG_BURST(fnb, fn, flags)                                                      \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[],
+						       uint16_t nb_events, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[],
+							   uint16_t nb_events,
+							   uint64_t timeout_ticks);
 
 #endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
new file mode 100644
index 0000000000..f7e0e8fe71
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
new file mode 100644
index 0000000000..7d5d4823c3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		      cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
new file mode 100644
index 0000000000..1bdc4bc82d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
new file mode 100644
index 0000000000..d3ed5fcac0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                                             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,                                  \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)                                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,                             \
+			  cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
new file mode 100644
index 0000000000..29c21441cf
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
new file mode 100644
index 0000000000..004b5ecb95
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
new file mode 100644
index 0000000000..d544b39e9e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
new file mode 100644
index 0000000000..ba7a1207ad
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
new file mode 100644
index 0000000000..eb7382e9d9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
new file mode 100644
index 0000000000..770b7221e6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
new file mode 100644
index 0000000000..1e71d22fc3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
new file mode 100644
index 0000000000..1a9e7efa0a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
new file mode 100644
index 0000000000..3d51bd6659
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
new file mode 100644
index 0000000000..851b5b7d31
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
new file mode 100644
index 0000000000..038ba726a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name,                   \
+			  flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
new file mode 100644
index 0000000000..68fb3ff53d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
new file mode 100644
index 0000000000..84f3ccd39c
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
new file mode 100644
index 0000000000..417f622412
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
new file mode 100644
index 0000000000..7fbea69134
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
new file mode 100644
index 0000000000..3bee216768
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
new file mode 100644
index 0000000000..9b341a0df5
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
new file mode 100644
index 0000000000..1f051f74a9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			   cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
new file mode 100644
index 0000000000..c134e27f25
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
new file mode 100644
index 0000000000..849e8e12fc
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		 cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
new file mode 100644
index 0000000000..9724caf5d6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
new file mode 100644
index 0000000000..997c208511
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
new file mode 100644
index 0000000000..bcf32e646b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
new file mode 100644
index 0000000000..b24e73439a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
new file mode 100644
index 0000000000..c03d034b66
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
new file mode 100644
index 0000000000..b37ef7a998
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
new file mode 100644
index 0000000000..da76b589a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
new file mode 100644
index 0000000000..3a8c02e4d2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_all_offload.c b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
new file mode 100644
index 0000000000..3983736b7e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[], uint16_t nb_events,
+				    uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[], uint16_t nb_events,
+					uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_TSTAMP_F | NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index ee27d38115..2f7fe0dc47 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -330,6 +330,49 @@ sources += files(
         'cn20k_eventdev.c',
         'cn20k_worker.c',
 )
+
+if host_machine.cpu_family().startswith('aarch') and not disable_template
+sources += files(
+        'deq/cn20k/deq_0_15_burst.c',
+        'deq/cn20k/deq_16_31_burst.c',
+        'deq/cn20k/deq_32_47_burst.c',
+        'deq/cn20k/deq_48_63_burst.c',
+        'deq/cn20k/deq_64_79_burst.c',
+        'deq/cn20k/deq_80_95_burst.c',
+        'deq/cn20k/deq_96_111_burst.c',
+        'deq/cn20k/deq_112_127_burst.c',
+        'deq/cn20k/deq_0_15_seg_burst.c',
+        'deq/cn20k/deq_16_31_seg_burst.c',
+        'deq/cn20k/deq_32_47_seg_burst.c',
+        'deq/cn20k/deq_48_63_seg_burst.c',
+        'deq/cn20k/deq_64_79_seg_burst.c',
+        'deq/cn20k/deq_80_95_seg_burst.c',
+        'deq/cn20k/deq_96_111_seg_burst.c',
+        'deq/cn20k/deq_112_127_seg_burst.c',
+        'deq/cn20k/deq_0_15_tmo_burst.c',
+        'deq/cn20k/deq_16_31_tmo_burst.c',
+        'deq/cn20k/deq_32_47_tmo_burst.c',
+        'deq/cn20k/deq_48_63_tmo_burst.c',
+        'deq/cn20k/deq_64_79_tmo_burst.c',
+        'deq/cn20k/deq_80_95_tmo_burst.c',
+        'deq/cn20k/deq_96_111_tmo_burst.c',
+        'deq/cn20k/deq_112_127_tmo_burst.c',
+        'deq/cn20k/deq_0_15_tmo_seg_burst.c',
+        'deq/cn20k/deq_16_31_tmo_seg_burst.c',
+        'deq/cn20k/deq_32_47_tmo_seg_burst.c',
+        'deq/cn20k/deq_48_63_tmo_seg_burst.c',
+        'deq/cn20k/deq_64_79_tmo_seg_burst.c',
+        'deq/cn20k/deq_80_95_tmo_seg_burst.c',
+        'deq/cn20k/deq_96_111_tmo_seg_burst.c',
+        'deq/cn20k/deq_112_127_tmo_seg_burst.c',
+        'deq/cn20k/deq_all_offload.c',
+)
+
+else
+sources += files(
+        'deq/cn20k/deq_all_offload.c',
+)
+endif
 endif
 
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 16/21] event/cnxk: support CN20K Tx adapter
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (13 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 15/21] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 17/21] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
                     ` (5 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c  | 126 +++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h  |   4 +
 drivers/event/cnxk/cn20k_tx_worker.h |  16 ++++
 3 files changed, 146 insertions(+)
 create mode 100644 drivers/event/cnxk/cn20k_tx_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 90c34b1d62..4fd9a67bf9 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -6,6 +6,7 @@
 
 #include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
+#include "cn20k_tx_worker.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
@@ -166,6 +167,35 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+static int
+cn20k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	if (dev->tx_adptr_data == NULL)
+		return 0;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		void *ws_cookie;
+
+		ws_cookie = cnxk_sso_hws_get_cookie(ws);
+		ws_cookie = rte_realloc_socket(ws_cookie,
+					       sizeof(struct cnxk_sso_hws_cookie) +
+						       sizeof(struct cn20k_sso_hws) +
+						       dev->tx_adptr_data_sz,
+					       RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+		if (ws_cookie == NULL)
+			return -ENOMEM;
+		ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+		memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, dev->tx_adptr_data_sz);
+		event_dev->data->ports[i] = ws;
+	}
+
+	return 0;
+}
+
 #if defined(RTE_ARCH_ARM64)
 static inline void
 cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
@@ -632,6 +662,95 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+			      uint32_t *caps)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		*caps = 0;
+	else
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+	return 0;
+}
+
+static void
+cn20k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cn20k_eth_txq *txq;
+	struct roc_nix_sq *sq;
+	int i;
+
+	if (tx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+			cn20k_sso_txq_fc_update(eth_dev, i);
+	} else {
+		uint16_t sqes_per_sqb;
+
+		sq = &cnxk_eth_dev->sqs[tx_queue_id];
+		txq = eth_dev->data->tx_queues[tx_queue_id];
+		sqes_per_sqb = 1U << txq->sqes_per_sqb_log2;
+		if (cnxk_eth_dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
+			sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc / sqes_per_sqb);
+		txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
+	}
+}
+
+static int
+cn20k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint64_t tx_offloads;
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+
+	/* Can't enable tstamp if all the ports don't have it enabled. */
+	tx_offloads = cnxk_eth_dev->tx_offload_flags;
+	if (dev->tx_adptr_configured) {
+		uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+		uint8_t tstmp_ena = !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+		if (tstmp_ena && !tstmp_req)
+			dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+		else if (!tstmp_ena && tstmp_req)
+			tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+	}
+
+	dev->tx_offloads |= tx_offloads;
+	cn20k_sso_txq_fc_update(eth_dev, tx_queue_id);
+	rc = cn20k_sso_updt_tx_adptr_data(event_dev);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	dev->tx_adptr_configured = 1;
+
+	return 0;
+}
+
+static int
+cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+	return cn20k_sso_updt_tx_adptr_data(event_dev);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -657,6 +776,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
+	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
+	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
+	.eth_tx_adapter_start = cnxk_sso_tx_adapter_start,
+	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
+	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index ce12628f81..8623f790a2 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -24,6 +24,10 @@ struct __rte_cache_aligned cn20k_sso_hws {
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
 	int32_t xaq_lmt;
+	/* Tx Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
+	uint64_t lso_tun_fmt;
+	uint8_t tx_adptr_data[];
 };
 
 #endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
new file mode 100644
index 0000000000..63fbdf5328
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_TX_WORKER_H__
+#define __CN20K_TX_WORKER_H__
+
+#include <rte_eventdev.h>
+#include <rte_vect.h>
+
+#include "cn20k_eventdev.h"
+#include "cn20k_tx.h"
+#include "cnxk_eventdev_dp.h"
+#include <rte_event_eth_tx_adapter.h>
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 17/21] event/cnxk: support CN20K Tx adapter fast path
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (14 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 16/21] event/cnxk: support CN20K Tx adapter pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 18/21] common/cnxk: add SSO event aggregator pbhagavatula
                     ` (4 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c          |  29 ++++
 drivers/event/cnxk/cn20k_tx_worker.h         | 174 +++++++++++++++++++
 drivers/event/cnxk/meson.build               |  20 +++
 drivers/event/cnxk/tx/cn20k/tx_0_15.c        |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c    |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127.c     |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111.c      |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c  |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_all_offload.c |  40 +++++
 20 files changed, 559 insertions(+)
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 4fd9a67bf9..2269165ea4 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -15,6 +15,9 @@
 #define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
 
+#define CN20K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                                               \
+	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -251,6 +254,19 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 #undef R
 	};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq_seg[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_seg_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
 	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
 		if (dev->rx_offloads & NIX_RX_REAS_F) {
 			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
@@ -283,6 +299,12 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 		}
 	}
 
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq_seg);
+	else
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq);
+
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 #else
 	RTE_SET_USED(event_dev);
 #endif
@@ -297,6 +319,13 @@ cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
 	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
 	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
 		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+	event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	if (dev->tx_offloads & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | NIX_TX_OFFLOAD_VLAN_QINQ_F |
+				NIX_TX_OFFLOAD_TSO_F | NIX_TX_OFFLOAD_TSTAMP_F)) {
+		event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+		event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+	}
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 63fbdf5328..02b2be4de2 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -13,4 +13,178 @@
 #include "cnxk_eventdev_dp.h"
 #include <rte_event_eth_tx_adapter.h>
 
+/* CN20K Tx event fastpath */
+
+static __rte_always_inline struct cn20k_eth_txq *
+cn20k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
+{
+	return (struct cn20k_eth_txq *)(txq_data[(txq_data[m->port] >> 48) +
+						 rte_event_eth_tx_adapter_txq_get(m)] &
+					(BIT_ULL(48) - 1));
+}
+
+static __rte_always_inline void
+cn20k_sso_txq_fc_wait(const struct cn20k_eth_txq *txq)
+{
+	int64_t avail;
+
+#ifdef RTE_ARCH_ARM64
+	int64_t val;
+
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.gt .Ldne%=				\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.le .Lrty%=				\n"
+		     ".Ldne%=:						\n"
+		     : [refill] "=&r"(avail), [val] "=&r" (val)
+		     : [addr] "r" (txq->fc_mem), [adj] "r" (txq->nb_sqb_bufs_adj),
+		       [shft] "r" (txq->sqes_per_sqb_log2)
+		     : "memory");
+#else
+	do {
+		avail = txq->nb_sqb_bufs_adj -
+			__atomic_load_n(txq->fc_mem, rte_memory_order_relaxed);
+	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
+#endif
+}
+
+static __rte_always_inline int32_t
+cn20k_sso_sq_depth(const struct cn20k_eth_txq *txq)
+{
+	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
+			(int32_t)__atomic_load_n(txq->fc_mem, rte_memory_order_relaxed);
+	return (avail << txq->sqes_per_sqb_log2) - avail;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
+		 uintptr_t lmt_addr, uint8_t sched_type, const uint64_t *txq_data,
+		 const uint32_t flags)
+{
+	uint8_t lnum = 0, loff = 0, shft = 0;
+	struct rte_mbuf *extm = NULL;
+	struct cn20k_eth_txq *txq;
+	uintptr_t laddr;
+	uint16_t segdw;
+	uintptr_t pa;
+	bool sec;
+
+	txq = cn20k_sso_hws_xtract_meta(m, txq_data);
+	if (cn20k_sso_sq_depth(txq) <= 0)
+		return 0;
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1);
+
+	cn20k_nix_tx_skeleton(txq, cmd, flags, 0);
+	/* Perform header writes before barrier
+	 * for TSO
+	 */
+	if (flags & NIX_TX_OFFLOAD_TSO_F)
+		cn20k_nix_xmit_prepare_tso(m, flags);
+
+	cn20k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, &sec, txq->mark_flag,
+			       txq->mark_fmt);
+
+	laddr = lmt_addr;
+	/* Prepare CPT instruction and get nixtx addr if
+	 * it is for CPT on same lmtline.
+	 */
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff, &shft, txq->sa_base,
+				   flags);
+
+	/* Move NIX desc to LMT/NIXTX area */
+	cn20k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
+
+	if (flags & NIX_TX_MULTI_SEG_F)
+		segdw = cn20k_nix_prepare_mseg(txq, m, &extm, (uint64_t *)laddr, flags);
+	else
+		segdw = cn20k_nix_tx_ext_subs(flags) + 2;
+
+	cn20k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		pa = txq->cpt_io_addr | 3 << 4;
+	else
+		pa = txq->io_addr | ((segdw - 1) << 4);
+
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
+
+	cn20k_sso_txq_fc_wait(txq);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_sec_fc_wait_one(txq);
+
+	roc_lmt_submit_steorl(lmt_id, pa);
+
+	/* Memory barrier to make sure lmtst store completes */
+	rte_io_wmb();
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+		cn20k_nix_free_extmbuf(extm);
+
+	return 1;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
+		       const uint64_t *txq_data, const uint32_t flags)
+{
+	struct rte_mbuf *m;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+
+	lmt_addr = ws->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	m = ev->mbuf;
+	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
+}
+
+#define T(name, sz, flags)                                                                         \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_##name(void *port, struct rte_event ev[],    \
+							     uint16_t nb_events);                  \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
+#define SSO_TX(fn, sz, flags)                                                                      \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint64_t cmd[sz];                                                                  \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data, flags);         \
+	}
+
+#define SSO_TX_SEG(fn, sz, flags)                                                                  \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];                               \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data,                 \
+					      (flags) | NIX_TX_MULTI_SEG_F);                       \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[],
+							      uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+								  uint16_t nb_events);
+
 #endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 2f7fe0dc47..25ac26dc30 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -368,9 +368,29 @@ sources += files(
         'deq/cn20k/deq_all_offload.c',
 )
 
+sources += files(
+        'tx/cn20k/tx_0_15.c',
+        'tx/cn20k/tx_16_31.c',
+        'tx/cn20k/tx_32_47.c',
+        'tx/cn20k/tx_48_63.c',
+        'tx/cn20k/tx_64_79.c',
+        'tx/cn20k/tx_80_95.c',
+        'tx/cn20k/tx_96_111.c',
+        'tx/cn20k/tx_112_127.c',
+        'tx/cn20k/tx_0_15_seg.c',
+        'tx/cn20k/tx_16_31_seg.c',
+        'tx/cn20k/tx_32_47_seg.c',
+        'tx/cn20k/tx_48_63_seg.c',
+        'tx/cn20k/tx_64_79_seg.c',
+        'tx/cn20k/tx_80_95_seg.c',
+        'tx/cn20k/tx_96_111_seg.c',
+        'tx/cn20k/tx_112_127_seg.c',
+        'tx/cn20k/tx_all_offload.c',
+)
 else
 sources += files(
         'deq/cn20k/deq_all_offload.c',
+        'tx/cn20k/tx_all_offload.c',
 )
 endif
 endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15.c b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
new file mode 100644
index 0000000000..b681bc8ab0
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
new file mode 100644
index 0000000000..1dacb63d4b
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127.c b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
new file mode 100644
index 0000000000..abdb8b76a1
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
new file mode 100644
index 0000000000..c39d331b25
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31.c b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
new file mode 100644
index 0000000000..5b88c47914
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
new file mode 100644
index 0000000000..13f00ac478
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47.c b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
new file mode 100644
index 0000000000..1f6008c425
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
new file mode 100644
index 0000000000..587f22df3a
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63.c b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
new file mode 100644
index 0000000000..c712825417
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
new file mode 100644
index 0000000000..1fc11ec904
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79.c b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
new file mode 100644
index 0000000000..0e427f79d8
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
new file mode 100644
index 0000000000..6e1ae41b26
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95.c b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
new file mode 100644
index 0000000000..8c87d2341d
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
new file mode 100644
index 0000000000..43a143f4bd
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111.c b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
new file mode 100644
index 0000000000..1a43af8b02
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
new file mode 100644
index 0000000000..e0e1d8a4ef
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_all_offload.c b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
new file mode 100644
index 0000000000..d2158a4256
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[], uint16_t nb_events)
+{
+	const uint32_t flags = (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_MBUF_NOFF_F |
+				NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+					       uint16_t nb_events)
+{
+	const uint32_t flags =
+		(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+		 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_MBUF_NOFF_F | NIX_TX_OFFLOAD_TSO_F |
+		 NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_SECURITY_F | NIX_TX_MULTI_SEG_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 18/21] common/cnxk: add SSO event aggregator
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (15 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 17/21] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 19/21] event/cnxk: add Rx/Tx event vector support pbhagavatula
                     ` (3 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add configuration APIs for CN20K SSO event
aggregator which allows SSO to generate event
vectors.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/sso.h        |  33 ++++
 drivers/common/cnxk/roc_mbox.h      |  21 +++
 drivers/common/cnxk/roc_model.h     |  13 ++
 drivers/common/cnxk/roc_nix_queue.c |   5 -
 drivers/common/cnxk/roc_sso.c       | 230 +++++++++++++++++++++++++++-
 drivers/common/cnxk/roc_sso.h       |  19 ++-
 drivers/common/cnxk/roc_sso_priv.h  |   4 +
 drivers/common/cnxk/version.map     |   4 +
 8 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/drivers/common/cnxk/hw/sso.h b/drivers/common/cnxk/hw/sso.h
index 09b8d4955f..79337a8a3b 100644
--- a/drivers/common/cnxk/hw/sso.h
+++ b/drivers/common/cnxk/hw/sso.h
@@ -146,6 +146,7 @@
 #define SSO_LF_GGRP_OP_ADD_WORK0 (0x0ull)
 #define SSO_LF_GGRP_OP_ADD_WORK1 (0x8ull)
 #define SSO_LF_GGRP_QCTL	 (0x20ull)
+#define SSO_LF_GGRP_TAG_CFG	 (0x40ull)
 #define SSO_LF_GGRP_EXE_DIS	 (0x80ull)
 #define SSO_LF_GGRP_INT		 (0x100ull)
 #define SSO_LF_GGRP_INT_W1S	 (0x108ull)
@@ -159,6 +160,10 @@
 #define SSO_LF_GGRP_MISC_CNT	 (0x200ull)
 #define SSO_LF_GGRP_OP_AW_LMTST	 (0x400ull)
 
+#define SSO_LF_GGRP_AGGR_CFG	    (0x300ull)
+#define SSO_LF_GGRP_AGGR_CTX_BASE   (0x308ull)
+#define SSO_LF_GGRP_AGGR_CTX_INSTOP (0x310ull)
+
 #define SSO_AF_IAQ_FREE_CNT_MASK      0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_MASK     0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_SHIFT    16
@@ -230,5 +235,33 @@
 #define SSO_TT_ATOMIC	(0x1ull)
 #define SSO_TT_UNTAGGED (0x2ull)
 #define SSO_TT_EMPTY	(0x3ull)
+#define SSO_TT_AGG	(0x3ull)
+
+#define SSO_LF_AGGR_INSTOP_FLUSH	(0x0ull)
+#define SSO_LF_AGGR_INSTOP_EVICT	(0x1ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_FLUSH (0x2ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_EVICT (0x3ull)
+
+#define SSO_AGGR_CTX_SZ	    16
+#define SSO_AGGR_NUM_CTX(a) (1 << (a + 6))
+#define SSO_AGGR_MIN_CTX    SSO_AGGR_NUM_CTX(0)
+#define SSO_AGGR_MAX_CTX    SSO_AGGR_NUM_CTX(10)
+#define SSO_AGGR_DEF_TMO    0x3Full
+
+struct sso_agq_ctx {
+	uint64_t ena : 1;
+	uint64_t rsvd_1_3 : 3;
+	uint64_t vwqe_aura : 17;
+	uint64_t rsvd_21_31 : 11;
+	uint64_t tag : 32;
+	uint64_t tt : 2;
+	uint64_t rsvd_66_67 : 2;
+	uint64_t swqe_tag : 12;
+	uint64_t max_vsize_exp : 4;
+	uint64_t vtimewait : 12;
+	uint64_t xqe_type : 4;
+	uint64_t cnt_ena : 1;
+	uint64_t rsvd_101_127 : 27;
+};
 
 #endif /* __SSO_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 63139b5517..db6e8f07b3 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,10 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_AGGR_SET_CONFIG, 0x615, sso_aggr_setconfig, sso_aggr_setconfig,  \
+	  msg_rsp)                                                             \
+	M(SSO_AGGR_GET_STATS, 0x616, sso_aggr_get_stats, sso_info_req,         \
+	  sso_aggr_stats)                                                      \
 	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
@@ -2191,6 +2195,13 @@ struct sso_grp_stash_cfg {
 	uint8_t __io num_linesm1 : 4;
 };
 
+struct sso_aggr_setconfig {
+	struct mbox_msghdr hdr;
+	uint16_t __io npa_pf_func;
+	uint16_t __io hwgrp;
+	uint64_t __io rsvd[2];
+};
+
 struct sso_grp_stats {
 	struct mbox_msghdr hdr;
 	uint16_t __io grp;
@@ -2210,6 +2221,16 @@ struct sso_hws_stats {
 	uint64_t __io arbitration;
 };
 
+struct sso_aggr_stats {
+	struct mbox_msghdr hdr;
+	uint16_t __io grp;
+	uint64_t __io flushed;
+	uint64_t __io completed;
+	uint64_t __io npa_fail;
+	uint64_t __io timeout;
+	uint64_t __io rsvd[4];
+};
+
 /* CPT mailbox error codes
  * Range 901 - 1000.
  */
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 4e686bea2c..0de141b0cc 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include "roc_bits.h"
+#include "roc_constants.h"
 
 extern struct roc_model *roc_model;
 
@@ -157,6 +158,18 @@ roc_model_is_cn20k(void)
 	return roc_model_runtime_is_cn20k();
 }
 
+static inline uint16_t
+roc_model_optimal_align_sz(void)
+{
+	if (roc_model_is_cn9k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn10k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn20k())
+		return ROC_ALIGN << 1;
+	return 128;
+}
+
 static inline uint64_t
 roc_model_is_cn98xx(void)
 {
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 06029275af..e852211ba4 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -794,9 +794,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 		aq->rq.good_utag = rq->tag_mask >> 24;
 		aq->rq.bad_utag = rq->tag_mask >> 24;
 		aq->rq.ltag = rq->tag_mask & BITMASK_ULL(24, 0);
-
-		if (rq->vwqe_ena)
-			aq->rq.wqe_aura = roc_npa_aura_handle_to_aura(rq->vwqe_aura_handle);
 	} else {
 		/* CQ mode */
 		aq->rq.sso_ena = 0;
@@ -881,8 +878,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 			aq->rq_mask.good_utag = ~aq->rq_mask.good_utag;
 			aq->rq_mask.bad_utag = ~aq->rq_mask.bad_utag;
 			aq->rq_mask.ltag = ~aq->rq_mask.ltag;
-			if (rq->vwqe_ena)
-				aq->rq_mask.wqe_aura = ~aq->rq_mask.wqe_aura;
 		} else {
 			/* CQ mode */
 			aq->rq_mask.sso_ena = ~aq->rq_mask.sso_ena;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 45cf6fc39e..4996329018 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -500,9 +500,231 @@ roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 	mbox_put(mbox);
 }
 
+static void
+sso_agq_op_wait(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	uint64_t reg;
+
+	reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	while (reg & BIT_ULL(2)) {
+		plt_delay_us(100);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) +
+				 SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	}
+}
+
+int
+roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_agq_data *data)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	uint32_t cnt, off;
+	struct mbox *mbox;
+	uintptr_t ptr;
+	uint64_t reg;
+	int rc;
+
+	if (sso->agg_mem[hwgrp] == 0) {
+		mbox = mbox_get(sso->dev.mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			mbox_process(mbox);
+			req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+			if (req == NULL) {
+				plt_err("Failed to allocate AGQ config mbox.");
+				mbox_put(mbox);
+				return -EIO;
+			}
+		}
+
+		req->hwgrp = hwgrp;
+		req->npa_pf_func = idev_npa_pffunc_get();
+		rc = mbox_process(mbox);
+		if (rc < 0) {
+			plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+			mbox_put(mbox);
+			return rc;
+		}
+
+		mbox_put(mbox);
+
+		sso->agg_mem[hwgrp] =
+			(uintptr_t)plt_zmalloc(SSO_AGGR_MIN_CTX * sizeof(struct sso_agq_ctx),
+					       roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0)
+			return -ENOMEM;
+		sso->agg_cnt[hwgrp] = SSO_AGGR_MIN_CTX;
+		sso->agg_used[hwgrp] = 0;
+		plt_wmb();
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = (plt_log2_u32(SSO_AGGR_MIN_CTX) - 6) << 16;
+		reg |= (SSO_AGGR_DEF_TMO << 4) | 1;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	}
+
+	if (sso->agg_cnt[hwgrp] >= SSO_AGGR_MAX_CTX)
+		return -ENOSPC;
+
+	if (sso->agg_cnt[hwgrp] == sso->agg_used[hwgrp]) {
+		ptr = sso->agg_mem[hwgrp];
+		cnt = sso->agg_cnt[hwgrp] << 1;
+		sso->agg_mem[hwgrp] = (uintptr_t)plt_zmalloc(cnt * sizeof(struct sso_agq_ctx),
+							     roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0) {
+			sso->agg_mem[hwgrp] = ptr;
+			return -ENOMEM;
+		}
+
+		memcpy((void *)sso->agg_mem[hwgrp], (void *)ptr,
+		       sso->agg_cnt[hwgrp] * sizeof(struct sso_agq_ctx));
+		plt_wmb();
+		sso_agq_op_wait(roc_sso, hwgrp);
+		/* Base address has changed, evict old entries. */
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg &= ~GENMASK_ULL(19, 16);
+		reg |= (uint64_t)(plt_log2_u32(cnt) - 6) << 16;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg = SSO_LF_AGGR_INSTOP_GLOBAL_EVICT << 4;
+		plt_write64(reg,
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+		sso_agq_op_wait(roc_sso, hwgrp);
+		plt_free((void *)ptr);
+
+		sso->agg_cnt[hwgrp] = cnt;
+		off = sso->agg_used[hwgrp];
+	} else {
+		ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+		for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+			if (!ctx[cnt].ena)
+				break;
+		}
+		if (cnt == sso->agg_cnt[hwgrp])
+			return -EINVAL;
+		off = cnt;
+	}
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += off;
+	ctx->ena = 1;
+	ctx->tt = data->tt;
+	ctx->tag = data->tag;
+	ctx->swqe_tag = data->stag;
+	ctx->cnt_ena = data->cnt_ena;
+	ctx->xqe_type = data->xqe_type;
+	ctx->vtimewait = data->vwqe_wait_tmo;
+	ctx->vwqe_aura = data->vwqe_aura;
+	ctx->max_vsize_exp = data->vwqe_max_sz_exp - 2;
+
+	plt_wmb();
+	sso->agg_used[hwgrp]++;
+
+	return 0;
+}
+
+void
+roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint64_t reg;
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += agq_id;
+
+	if (!ctx->ena)
+		return;
+
+	reg = SSO_LF_AGGR_INSTOP_FLUSH << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+
+	memset(ctx, 0, sizeof(struct sso_agq_ctx));
+	plt_wmb();
+	sso->agg_used[hwgrp]--;
+
+	/* Flush the context from CTX Cache */
+	reg = SSO_LF_AGGR_INSTOP_EVICT << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+}
+
+void
+roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	struct mbox *mbox;
+	uint32_t cnt;
+	int rc;
+
+	if (!roc_sso->feat.eva_present)
+		return;
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+		if (!ctx[cnt].ena)
+			continue;
+		roc_sso_hwgrp_agq_free(roc_sso, hwgrp, cnt);
+	}
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+	plt_free((void *)sso->agg_mem[hwgrp]);
+	sso->agg_mem[hwgrp] = 0;
+	sso->agg_cnt[hwgrp] = 0;
+	sso->agg_used[hwgrp] = 0;
+
+	mbox = mbox_get(sso->dev.mbox);
+	req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+	if (req == NULL) {
+		mbox_process(mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			plt_err("Failed to allocate AGQ config mbox.");
+			mbox_put(mbox);
+			return;
+		}
+	}
+
+	req->hwgrp = hwgrp;
+	req->npa_pf_func = 0;
+	rc = mbox_process(mbox);
+	if (rc < 0)
+		plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+	mbox_put(mbox);
+}
+
+uint32_t
+roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag_mask,
+			   uint8_t xqe_type)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint32_t i;
+
+	plt_rmb();
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (i = 0; i < sso->agg_used[hwgrp]; i++) {
+		if (!ctx[i].ena)
+			continue;
+		if (ctx[i].tag == tag_mask && ctx[i].xqe_type == xqe_type)
+			return i;
+	}
+
+	return UINT32_MAX;
+}
+
 int
-roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
-			struct roc_sso_hwgrp_stats *stats)
+roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_hwgrp_stats *stats)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
 	struct sso_grp_stats *req_rsp;
@@ -1058,10 +1280,14 @@ void
 roc_sso_rsrc_fini(struct roc_sso *roc_sso)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	uint32_t cnt;
 
 	if (!roc_sso->nb_hws && !roc_sso->nb_hwgrp)
 		return;
 
+	for (cnt = 0; cnt < roc_sso->nb_hwgrp; cnt++)
+		roc_sso_hwgrp_agq_release(roc_sso, cnt);
+
 	sso_unregister_irqs_priv(roc_sso, sso->pci_dev->intr_handle,
 				 roc_sso->nb_hws, roc_sso->nb_hwgrp);
 	sso_lf_free(&sso->dev, SSO_LF_TYPE_HWS, roc_sso->nb_hws);
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 021db22c86..f73128087a 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -47,6 +47,17 @@ struct roc_sso_xaq_data {
 	void *mem;
 };
 
+struct roc_sso_agq_data {
+	uint8_t tt;
+	uint8_t cnt_ena;
+	uint8_t xqe_type;
+	uint16_t stag;
+	uint32_t tag;
+	uint32_t vwqe_max_sz_exp;
+	uint64_t vwqe_wait_tmo;
+	uint64_t vwqe_aura;
+};
+
 struct roc_sso {
 	struct plt_pci_device *pci_dev;
 	/* Public data. */
@@ -100,6 +111,12 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 					 uint16_t nb_stash);
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
+int __roc_api roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp,
+				      struct roc_sso_agq_data *data);
+void __roc_api roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id);
+void __roc_api roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp);
+uint32_t __roc_api roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag,
+					      uint8_t xqe_type);
 
 /* Utility function */
 uint16_t __roc_api roc_sso_pf_func_get(void);
@@ -107,7 +124,7 @@ uint16_t __roc_api roc_sso_pf_func_get(void);
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
-int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
+int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp,
 				      struct roc_sso_hwgrp_stats *stats);
 int __roc_api roc_sso_hws_stats_get(struct roc_sso *roc_sso, uint8_t hws,
 				    struct roc_sso_hws_stats *stats);
diff --git a/drivers/common/cnxk/roc_sso_priv.h b/drivers/common/cnxk/roc_sso_priv.h
index 21c59c57e6..d6dc6dedd3 100644
--- a/drivers/common/cnxk/roc_sso_priv.h
+++ b/drivers/common/cnxk/roc_sso_priv.h
@@ -13,6 +13,10 @@ struct sso_rsrc {
 struct sso {
 	struct plt_pci_device *pci_dev;
 	struct dev dev;
+	/* EVA memory area */
+	uintptr_t agg_mem[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_used[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_cnt[MAX_RVU_BLKLF_CNT];
 	/* Interrupt handler args. */
 	struct sso_rsrc hws_rsrc[MAX_RVU_BLKLF_CNT];
 	struct sso_rsrc hwgrp_rsrc[MAX_RVU_BLKLF_CNT];
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index de748ac409..14ee6031e2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -500,6 +500,10 @@ INTERNAL {
 	roc_sso_dev_fini;
 	roc_sso_dev_init;
 	roc_sso_dump;
+	roc_sso_hwgrp_agq_alloc;
+	roc_sso_hwgrp_agq_free;
+	roc_sso_hwgrp_agq_from_tag;
+	roc_sso_hwgrp_agq_release;
 	roc_sso_hwgrp_alloc_xaq;
 	roc_sso_hwgrp_base_get;
 	roc_sso_hwgrp_free_xaq_aura;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 19/21] event/cnxk: add Rx/Tx event vector support
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (16 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 18/21] common/cnxk: add SSO event aggregator pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 20/21] common/cnxk: update timer base code pbhagavatula
                     ` (2 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add Event vector support for CN20K Rx/Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c      | 185 ++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_tx_worker.h     |  84 ++++++++++
 drivers/event/cnxk/cn20k_worker.h        |  63 ++++++++
 drivers/event/cnxk/cnxk_eventdev.h       |   3 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  16 +-
 5 files changed, 340 insertions(+), 11 deletions(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 2269165ea4..e643a14522 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -73,6 +73,7 @@ cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->xaq_lmt = dev->xaq_lmt;
 	ws->fc_cache_space = dev->fc_cache_space;
 	ws->aw_lmt = dev->sso.lmt_base;
+	ws->lmt_base = dev->sso.lmt_base;
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -593,7 +594,8 @@ cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
 	else
 		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
 			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
-			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -639,6 +641,156 @@ cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
 	eventdev_fops_tstamp_update(event_dev);
 }
 
+static int
+cn20k_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, uint16_t port_id,
+		     const struct rte_event_eth_rx_adapter_queue_conf *queue_conf, int agq)
+{
+	struct roc_nix_rq *rq;
+	uint32_t tag_mask;
+	uint16_t wqe_skip;
+	uint8_t tt;
+	int rc;
+
+	rq = &cnxk_eth_dev->rqs[rq_id];
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+		tag_mask = agq;
+		tt = SSO_TT_AGG;
+		rq->flow_tag_width = 0;
+	} else {
+		tag_mask = (port_id & 0xFF) << 20;
+		tag_mask |= (RTE_EVENT_TYPE_ETHDEV << 28);
+		tt = queue_conf->ev.sched_type;
+		rq->flow_tag_width = 20;
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) {
+			rq->flow_tag_width = 0;
+			tag_mask |= queue_conf->ev.flow_id;
+		}
+	}
+
+	rq->tag_mask = tag_mask;
+	rq->sso_ena = 1;
+	rq->tt = tt;
+	rq->hwgrp = queue_conf->ev.queue_id;
+	wqe_skip = RTE_ALIGN_CEIL(sizeof(struct rte_mbuf), ROC_CACHE_LINE_SZ);
+	wqe_skip = wqe_skip / ROC_CACHE_LINE_SZ;
+	rq->wqe_skip = wqe_skip;
+
+	rc = roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+	return rc;
+}
+
+static int
+cn20k_sso_rx_adapter_vwqe_enable(struct cnxk_sso_evdev *dev, uint16_t port_id, uint16_t rq_id,
+				 const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	uint32_t agq, tag_mask, stag_mask;
+	struct roc_sso_agq_data data;
+	int rc;
+
+	tag_mask = (port_id & 0xff) << 20;
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)
+		tag_mask |= queue_conf->ev.flow_id;
+	else
+		tag_mask |= rq_id;
+
+	stag_mask = tag_mask;
+	tag_mask |= RTE_EVENT_TYPE_ETHDEV_VECTOR << 28;
+	stag_mask |= RTE_EVENT_TYPE_ETHDEV << 28;
+
+	memset(&data, 0, sizeof(struct roc_sso_agq_data));
+	data.tag = tag_mask;
+	data.tt = queue_conf->ev.sched_type;
+	data.stag = stag_mask;
+	data.vwqe_aura = roc_npa_aura_handle_to_aura(queue_conf->vector_mp->pool_id);
+	data.vwqe_max_sz_exp = rte_log2_u32(queue_conf->vector_sz);
+	data.vwqe_wait_tmo = queue_conf->vector_timeout_ns / ((SSO_AGGR_DEF_TMO + 1) * 100);
+	data.xqe_type = 0;
+
+	rc = roc_sso_hwgrp_agq_alloc(&dev->sso, queue_conf->ev.queue_id, &data);
+	if (rc < 0)
+		return rc;
+
+	agq = roc_sso_hwgrp_agq_from_tag(&dev->sso, queue_conf->ev.queue_id, tag_mask, 0);
+	return agq;
+}
+
+static int
+cn20k_rx_adapter_queue_add(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id,
+			   const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t port = eth_dev->data->port_id;
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	int i, rc = 0, agq = 0;
+
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			rc |= cn20k_rx_adapter_queue_add(event_dev, eth_dev, i, queue_conf);
+	} else {
+		rxq_sp = cnxk_eth_rxq_to_sp(eth_dev->data->rx_queues[rx_queue_id]);
+		cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+		rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+			cnxk_sso_updt_xae_cnt(dev, queue_conf->vector_mp,
+					      RTE_EVENT_TYPE_ETHDEV_VECTOR);
+			rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+			if (rc < 0)
+				return rc;
+
+			rc = cn20k_sso_rx_adapter_vwqe_enable(dev, port, rx_queue_id, queue_conf);
+			if (rc < 0)
+				return rc;
+			agq = rc;
+		}
+
+		rc = cn20k_sso_rxq_enable(cnxk_eth_dev, (uint16_t)rx_queue_id, port, queue_conf,
+					  agq);
+
+		/* Propagate force bp devarg */
+		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
+		cnxk_sso_tstamp_cfg(port, eth_dev, dev);
+		cnxk_eth_dev->nb_rxq_sso++;
+	}
+
+	if (rc < 0) {
+		plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+			queue_conf->ev.queue_id);
+		return rc;
+	}
+
+	dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+	return 0;
+}
+
+static int
+cn20k_rx_adapter_queue_del(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_nix_rq *rxq;
+	int i, rc = 0;
+
+	RTE_SET_USED(event_dev);
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			cn20k_rx_adapter_queue_del(event_dev, eth_dev, i);
+	} else {
+		rxq = &cnxk_eth_dev->rqs[rx_queue_id];
+		if (rxq->tt == SSO_TT_AGG)
+			roc_sso_hwgrp_agq_free(&dev->sso, rxq->hwgrp, rxq->tag_mask);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
+		cnxk_eth_dev->nb_rxq_sso--;
+	}
+
+	if (rc < 0)
+		plt_err("Failed to clear Rx adapter config port=%d, q=%d", eth_dev->data->port_id,
+			rx_queue_id);
+	return rc;
+}
+
 static int
 cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
@@ -655,7 +807,7 @@ cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	rc = cn20k_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
 	if (rc)
 		return -EINVAL;
 
@@ -688,7 +840,29 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+	return cn20k_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
+static int
+cn20k_sso_rx_adapter_vector_limits(const struct rte_eventdev *dev,
+				   const struct rte_eth_dev *eth_dev,
+				   struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	RTE_SET_USED(eth_dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		return -ENOTSUP;
+
+	limits->log2_sz = true;
+	limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+	limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+	limits->min_timeout_ns = (SSO_AGGR_DEF_TMO + 1) * 100;
+	limits->max_timeout_ns = (BITMASK_ULL(11, 0) + 1) * limits->min_timeout_ns;
+
+	return 0;
 }
 
 static int
@@ -702,7 +876,8 @@ cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_e
 	if (ret)
 		*caps = 0;
 	else
-		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -805,6 +980,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_rx_adapter_vector_limits_get = cn20k_sso_rx_adapter_vector_limits,
+
 	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
 	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
 	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 02b2be4de2..811d2c6eeb 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -137,10 +137,58 @@ cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, ui
 	return 1;
 }
 
+static __rte_always_inline uint16_t
+cn20k_sso_vwqe_split_tx(struct cn20k_sso_hws *ws, struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+			uint64_t *cmd, const uint64_t *txq_data, const uint32_t flags)
+{
+	uint16_t count = 0, port, queue, ret = 0, last_idx = 0;
+	struct cn20k_eth_txq *txq;
+	int32_t space;
+	int i;
+
+	port = mbufs[0]->port;
+	queue = rte_event_eth_tx_adapter_txq_get(mbufs[0]);
+	for (i = 0; i < nb_mbufs; i++) {
+		if (port != mbufs[i]->port || queue != rte_event_eth_tx_adapter_txq_get(mbufs[i])) {
+			if (count) {
+				txq = (struct cn20k_eth_txq
+					       *)(txq_data[(txq_data[port] >> 48) + queue] &
+						  (BIT_ULL(48) - 1));
+				/* Transmit based on queue depth */
+				space = cn20k_sso_sq_depth(txq);
+				if (space < count)
+					goto done;
+				cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx],
+							   count, cmd, flags | NIX_TX_VWQE_F);
+				ret += count;
+				count = 0;
+			}
+			port = mbufs[i]->port;
+			queue = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+			last_idx = i;
+		}
+		count++;
+	}
+	if (count) {
+		txq = (struct cn20k_eth_txq *)(txq_data[(txq_data[port] >> 48) + queue] &
+					       (BIT_ULL(48) - 1));
+		/* Transmit based on queue depth */
+		space = cn20k_sso_sq_depth(txq);
+		if (space < count)
+			goto done;
+		cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx], count, cmd,
+					   flags | NIX_TX_VWQE_F);
+		ret += count;
+	}
+done:
+	return ret;
+}
+
 static __rte_always_inline uint16_t
 cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
 		       const uint64_t *txq_data, const uint32_t flags)
 {
+	struct cn20k_eth_txq *txq;
 	struct rte_mbuf *m;
 	uintptr_t lmt_addr;
 	uint16_t lmt_id;
@@ -148,6 +196,42 @@ cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t
 	lmt_addr = ws->lmt_base;
 	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
 
+	if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+		struct rte_mbuf **mbufs = ev->vec->mbufs;
+		uint64_t meta = *(uint64_t *)ev->vec;
+		uint16_t offset, nb_pkts, left;
+		int32_t space;
+
+		nb_pkts = meta & 0xFFFF;
+		offset = (meta >> 16) & 0xFFF;
+		if (meta & BIT(31)) {
+			txq = (struct cn20k_eth_txq
+				       *)(txq_data[(txq_data[meta >> 32] >> 48) + (meta >> 48)] &
+					  (BIT_ULL(48) - 1));
+
+			/* Transmit based on queue depth */
+			space = cn20k_sso_sq_depth(txq);
+			if (space <= 0)
+				return 0;
+			nb_pkts = nb_pkts < space ? nb_pkts : (uint16_t)space;
+			cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs + offset, nb_pkts,
+						   cmd, flags | NIX_TX_VWQE_F);
+		} else {
+			nb_pkts = cn20k_sso_vwqe_split_tx(ws, mbufs + offset, nb_pkts, cmd,
+							  txq_data, flags);
+		}
+		left = (meta & 0xFFFF) - nb_pkts;
+
+		if (!left) {
+			rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+		} else {
+			*(uint64_t *)ev->vec =
+				(meta & ~0xFFFFFFFUL) | (((uint32_t)nb_pkts + offset) << 16) | left;
+		}
+		rte_prefetch0(ws);
+		return !left;
+	}
+
 	m = ev->mbuf;
 	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
 }
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 9075073fd2..5799e5cc49 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -41,6 +41,58 @@ cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info
 	}
 }
 
+static __rte_always_inline void
+cn20k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, struct cn20k_sso_hws *ws)
+{
+	uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM;
+	struct cnxk_timesync_info *tstamp = ws->tstamp[port_id];
+	void *lookup_mem = ws->lookup_mem;
+	uintptr_t lbase = ws->lmt_base;
+	struct rte_event_vector *vec;
+	uint16_t nb_mbufs, non_vec;
+	struct rte_mbuf **wqe;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+	int i;
+
+	mbuf_init |= ((uint64_t)port_id) << 48;
+	vec = (struct rte_event_vector *)vwqe;
+	wqe = vec->mbufs;
+
+	rte_prefetch0(&vec->ptrs[0]);
+#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
+	for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
+		rte_prefetch0(&vec->ptrs[i]);
+
+	if (flags & NIX_RX_OFFLOAD_TSTAMP_F && tstamp)
+		mbuf_init |= 8;
+
+	nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+	nb_mbufs = cn20k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs, flags | NIX_RX_VWQE_F,
+					      lookup_mem, tstamp, lbase, 0);
+	wqe += nb_mbufs;
+	non_vec = vec->nb_elem - nb_mbufs;
+
+	while (non_vec) {
+		struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+
+		mbuf = (struct rte_mbuf *)((char *)cqe - sizeof(struct rte_mbuf));
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp((uint64_t)wqe[0], (uint64_t)mbuf, tstamp);
+		wqe[0] = (struct rte_mbuf *)mbuf;
+		non_vec--;
+		wqe++;
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
@@ -65,6 +117,17 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
 			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
 		u64[1] = mbuf;
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		__uint128_t vwqe_hdr = *(__uint128_t *)u64[1];
+
+		vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | ((vwqe_hdr & 0xFFFF) << 48) |
+			   ((uint64_t)port << 32);
+		*(uint64_t *)u64[1] = (uint64_t)vwqe_hdr;
+		cn20k_process_vwqe(u64[1], port, flags, ws);
+		/* Mark vector mempool object as get */
+		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
+					  1);
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 3605050709..ec7de6dc6d 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -266,6 +266,9 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
 			      const struct rte_eth_dev *eth_dev);
 int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
 			     const struct rte_eth_dev *eth_dev);
+void cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev,
+			 struct cnxk_sso_evdev *dev);
+int cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id);
 int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
 				  const struct rte_eth_dev *eth_dev,
 				  int32_t tx_queue_id);
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3cac42111a..4cf48db74c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -167,9 +167,10 @@ cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
 	return rc;
 }
 
-static int
-cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+int
+cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
 	struct roc_nix_rq *rq;
 
 	rq = &cnxk_eth_dev->rqs[rq_id];
@@ -209,10 +210,11 @@ cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
 	return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
 }
 
-static void
-cnxk_sso_tstamp_cfg(uint16_t port_id, struct cnxk_eth_dev *cnxk_eth_dev,
-		    struct cnxk_sso_evdev *dev)
+void
+cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev, struct cnxk_sso_evdev *dev)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+
 	if (cnxk_eth_dev->rx_offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP || cnxk_eth_dev->ptp_en)
 		dev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
 }
@@ -263,7 +265,7 @@ cnxk_sso_rx_adapter_queue_add(
 
 		/* Propagate force bp devarg */
 		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
-		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, cnxk_eth_dev, dev);
+		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, eth_dev, dev);
 		cnxk_eth_dev->nb_rxq_sso++;
 	}
 
@@ -290,7 +292,7 @@ cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
 			cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
 	} else {
-		rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
 		cnxk_eth_dev->nb_rxq_sso--;
 
 		/* Enable drop_re if it was disabled earlier */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 20/21] common/cnxk: update timer base code
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (17 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 19/21] event/cnxk: add Rx/Tx event vector support pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-21 20:57   ` [PATCH v2 21/21] event/cnxk: add CN20K timer adapter pbhagavatula
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Update event timer base code to support configuring
HW accelerated timer arm and cancel.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/tim.h        |  5 ++
 drivers/common/cnxk/roc_mbox.h      | 38 ++++++++++++-
 drivers/common/cnxk/roc_tim.c       | 84 ++++++++++++++++++++++++++---
 drivers/common/cnxk/roc_tim.h       | 20 +++++--
 drivers/common/cnxk/version.map     |  1 +
 drivers/event/cnxk/cnxk_tim_evdev.h |  5 --
 6 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index 82b094e3dc..75700a11b8 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -47,10 +47,15 @@
 #define TIM_LF_RAS_INT_ENA_W1S	   (0x310)
 #define TIM_LF_RAS_INT_ENA_W1C	   (0x318)
 #define TIM_LF_RING_REL		   (0x400)
+#define TIM_LF_SCHED_TIMER0	   (0x480)
+#define TIM_LF_RING_FIRST_EXPIRY   (0x558)
 
 #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
+#define TIM_MAX_INTERVAL_EXT_TICKS ((1ULL << 34) - 1)
 #define TIM_MAX_BUCKET_SIZE    ((1ULL << 20) - 2)
 #define TIM_MIN_BUCKET_SIZE    1
 #define TIM_BUCKET_WRAP_SIZE   3
+#define TIM_BUCKET_MIN_GAP     1
+#define TIM_NPA_TMO            0xFFFF
 
 #endif /* __TIM_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index db6e8f07b3..8c0e274684 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -164,6 +164,9 @@ struct mbox_msghdr {
 	  tim_intvl_rsp)                                                       \
 	M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req,          \
 	  tim_capture_rsp)                                                     \
+	M(TIM_CONFIG_HWWQE,    0x807, tim_config_hwwqe, tim_cfg_hwwqe_req,     \
+	  msg_rsp)                                                             \
+	M(TIM_GET_HW_INFO,     0x808, tim_get_hw_info, msg_req, tim_hw_info)   \
 	/* CPT mbox IDs (range 0xA00 - 0xBFF) */                               \
 	M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)    \
 	M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)                   \
@@ -2803,6 +2806,7 @@ enum tim_af_status {
 	TIM_AF_INVALID_ENABLE_DONTFREE = -815,
 	TIM_AF_ENA_DONTFRE_NSET_PERIODIC = -816,
 	TIM_AF_RING_ALREADY_DISABLED = -817,
+	TIM_AF_LF_START_SYNC_FAIL = -818,
 };
 
 enum tim_clk_srcs {
@@ -2895,13 +2899,43 @@ struct tim_config_req {
 	uint8_t __io enabledontfreebuffer;
 	uint32_t __io bucketsize;
 	uint32_t __io chunksize;
-	uint32_t __io interval;
+	uint32_t __io interval_lo;
 	uint8_t __io gpioedge;
-	uint8_t __io rsvd[7];
+	uint8_t __io rsvd[3];
+	uint32_t __io interval_hi;
 	uint64_t __io intervalns;
 	uint64_t __io clockfreq;
 };
 
+struct tim_cfg_hwwqe_req {
+	struct mbox_msghdr hdr;
+	uint16_t __io ring;
+	uint8_t __io grp_ena;
+	uint8_t __io hwwqe_ena;
+	uint8_t __io ins_min_gap;
+	uint8_t __io flw_ctrl_ena;
+	uint8_t __io wqe_rd_clr_ena;
+	uint16_t __io grp_tmo_cntr;
+	uint16_t __io npa_tmo_cntr;
+	uint16_t __io result_offset;
+	uint16_t __io event_count_offset;
+	uint64_t __io rsvd[2];
+};
+
+struct tim_feat_info {
+	uint16_t __io rings;
+	uint8_t __io engines;
+	uint8_t __io hwwqe : 1;
+	uint8_t __io intvl_ext : 1;
+	uint8_t __io rsvd8[4];
+	uint64_t __io rsvd[2];
+};
+
+struct tim_hw_info {
+	struct mbox_msghdr hdr;
+	struct tim_feat_info feat;
+};
+
 struct tim_lf_alloc_rsp {
 	struct mbox_msghdr hdr;
 	uint64_t __io tenns_clk;
diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index 83228fb2b6..e326ea0122 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+#define LF_ENABLE_RETRY_CNT 8
+
 static int
 tim_fill_msix(struct roc_tim *roc_tim, uint16_t nb_ring)
 {
@@ -86,8 +88,11 @@ tim_err_desc(int rc)
 	case TIM_AF_RING_ALREADY_DISABLED:
 		plt_err("Ring already stopped");
 		break;
+	case TIM_AF_LF_START_SYNC_FAIL:
+		plt_err("Ring start sync failed.");
+		break;
 	default:
-		plt_err("Unknown Error.");
+		plt_err("Unknown Error: %d", rc);
 	}
 }
 
@@ -123,10 +128,12 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
 	struct mbox *mbox = mbox_get(dev->mbox);
+	uint8_t retry_cnt = LF_ENABLE_RETRY_CNT;
 	struct tim_enable_rsp *rsp;
 	struct tim_ring_req *req;
 	int rc = -ENOSPC;
 
+retry:
 	req = mbox_alloc_msg_tim_enable_ring(mbox);
 	if (req == NULL)
 		goto fail;
@@ -134,6 +141,9 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 
 	rc = mbox_process_msg(dev->mbox, (void **)&rsp);
 	if (rc) {
+		if (rc == TIM_AF_LF_START_SYNC_FAIL && retry_cnt--)
+			goto retry;
+
 		tim_err_desc(rc);
 		rc = -EIO;
 		goto fail;
@@ -183,10 +193,9 @@ roc_tim_lf_base_get(struct roc_tim *roc_tim, uint8_t ring_id)
 }
 
 int
-roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-		  enum roc_tim_clk_src clk_src, uint8_t ena_periodic,
-		  uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
-		  uint32_t interval, uint64_t intervalns, uint64_t clockfreq)
+roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id, enum roc_tim_clk_src clk_src,
+		  uint8_t ena_periodic, uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
+		  uint64_t interval, uint64_t intervalns, uint64_t clockfreq)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
@@ -204,7 +213,8 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	req->clocksource = clk_src;
 	req->enableperiodic = ena_periodic;
 	req->enabledontfreebuffer = ena_dfb;
-	req->interval = interval;
+	req->interval_lo = interval;
+	req->interval_hi = interval >> 32;
 	req->intervalns = intervalns;
 	req->clockfreq = clockfreq;
 	req->gpioedge = TIM_GPIO_LTOH_TRANS;
@@ -220,6 +230,41 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	return rc;
 }
 
+int
+roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id, struct roc_tim_hwwqe_cfg *cfg)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
+	struct dev *dev = &sso->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_cfg_hwwqe_req *req;
+	int rc = -ENOSPC;
+
+	req = mbox_alloc_msg_tim_config_hwwqe(mbox);
+	if (req == NULL)
+		goto fail;
+	req->ring = ring_id;
+	req->hwwqe_ena = cfg->hwwqe_ena;
+	req->grp_ena = cfg->grp_ena;
+	req->grp_tmo_cntr = cfg->grp_tmo_cyc;
+	req->flw_ctrl_ena = cfg->flw_ctrl_ena;
+	req->result_offset = cfg->result_offset;
+	req->event_count_offset = cfg->event_count_offset;
+
+	req->wqe_rd_clr_ena = 1;
+	req->npa_tmo_cntr = TIM_NPA_TMO;
+	req->ins_min_gap = TIM_BUCKET_MIN_GAP;
+
+	rc = mbox_process(mbox);
+	if (rc) {
+		tim_err_desc(rc);
+		rc = -EIO;
+	}
+
+fail:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_lf_interval(struct roc_tim *roc_tim, enum roc_tim_clk_src clk_src,
 		    uint64_t clockfreq, uint64_t *intervalns,
@@ -353,6 +398,31 @@ tim_free_lf_count_get(struct dev *dev, uint16_t *nb_lfs)
 	return 0;
 }
 
+static int
+tim_hw_info_get(struct roc_tim *roc_tim)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_tim->roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_tim_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc != MBOX_MSG_INVALID)
+		mbox_memcpy(&roc_tim->feat, &rsp->feat, sizeof(roc_tim->feat));
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_init(struct roc_tim *roc_tim)
 {
@@ -372,6 +442,8 @@ roc_tim_init(struct roc_tim *roc_tim)
 	PLT_STATIC_ASSERT(sizeof(struct tim) <= TIM_MEM_SZ);
 	nb_lfs = roc_tim->nb_lfs;
 
+	rc = tim_hw_info_get(roc_tim);
+
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
 		plt_tim_dbg("Failed to get TIM resource count");
diff --git a/drivers/common/cnxk/roc_tim.h b/drivers/common/cnxk/roc_tim.h
index f9a9ad1887..2eb6e6962b 100644
--- a/drivers/common/cnxk/roc_tim.h
+++ b/drivers/common/cnxk/roc_tim.h
@@ -19,10 +19,20 @@ enum roc_tim_clk_src {
 	ROC_TIM_CLK_SRC_INVALID,
 };
 
+struct roc_tim_hwwqe_cfg {
+	uint8_t grp_ena;
+	uint8_t hwwqe_ena;
+	uint8_t flw_ctrl_ena;
+	uint16_t grp_tmo_cyc;
+	uint16_t result_offset;
+	uint16_t event_count_offset;
+};
+
 struct roc_tim {
 	struct roc_sso *roc_sso;
 	/* Public data. */
 	uint16_t nb_lfs;
+	struct tim_feat_info feat;
 	/* Private data. */
 #define TIM_MEM_SZ (1 * 1024)
 	uint8_t reserved[TIM_MEM_SZ] __plt_cache_aligned;
@@ -36,11 +46,11 @@ int __roc_api roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id,
 				uint64_t *start_tsc, uint32_t *cur_bkt);
 int __roc_api roc_tim_lf_disable(struct roc_tim *roc_tim, uint8_t ring_id);
 int __roc_api roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-				enum roc_tim_clk_src clk_src,
-				uint8_t ena_periodic, uint8_t ena_dfb,
-				uint32_t bucket_sz, uint32_t chunk_sz,
-				uint32_t interval, uint64_t intervalns,
-				uint64_t clockfreq);
+				enum roc_tim_clk_src clk_src, uint8_t ena_periodic, uint8_t ena_dfb,
+				uint32_t bucket_sz, uint32_t chunk_sz, uint64_t interval,
+				uint64_t intervalns, uint64_t clockfreq);
+int __roc_api roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id,
+				      struct roc_tim_hwwqe_cfg *cfg);
 int __roc_api roc_tim_lf_interval(struct roc_tim *roc_tim,
 				  enum roc_tim_clk_src clk_src,
 				  uint64_t clockfreq, uint64_t *intervalns,
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 14ee6031e2..e7381ae8b2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -528,6 +528,7 @@ INTERNAL {
 	roc_tim_lf_alloc;
 	roc_tim_lf_base_get;
 	roc_tim_lf_config;
+	roc_tim_lf_config_hwwqe;
 	roc_tim_lf_disable;
 	roc_tim_lf_enable;
 	roc_tim_lf_free;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 6cf10dbf4d..9bd36158d8 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -16,11 +16,6 @@
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
 
-#include "hw/tim.h"
-
-#include "roc_model.h"
-#include "roc_tim.h"
-
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
 #define TICK2NSEC(__tck, __freq) (((__tck)*NSECPERSEC) / (__freq))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v2 21/21] event/cnxk: add CN20K timer adapter
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (18 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 20/21] common/cnxk: update timer base code pbhagavatula
@ 2024-10-21 20:57   ` pbhagavatula
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-21 20:57 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event timer adapter support for CN20K platform.
Implement new HWWQE insertion feature supported by CN20K platform.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_tim.c        |   6 +-
 drivers/event/cnxk/cn20k_eventdev.c  |  16 ++-
 drivers/event/cnxk/cn20k_worker.h    |   6 +
 drivers/event/cnxk/cnxk_tim_evdev.c  |  37 ++++-
 drivers/event/cnxk/cnxk_tim_evdev.h  |  14 ++
 drivers/event/cnxk/cnxk_tim_worker.c |  89 ++++++++++--
 drivers/event/cnxk/cnxk_tim_worker.h | 197 +++++++++++++++++++++++++++
 7 files changed, 349 insertions(+), 16 deletions(-)

diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index e326ea0122..a1461fedb1 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -409,7 +409,7 @@ tim_hw_info_get(struct roc_tim *roc_tim)
 	mbox_alloc_msg_tim_get_hw_info(mbox);
 	rc = mbox_process_msg(mbox, (void **)&rsp);
 	if (rc && rc != MBOX_MSG_INVALID) {
-		plt_err("Failed to get SSO HW info");
+		plt_err("Failed to get TIM HW info");
 		rc = -EIO;
 		goto exit;
 	}
@@ -443,6 +443,10 @@ roc_tim_init(struct roc_tim *roc_tim)
 	nb_lfs = roc_tim->nb_lfs;
 
 	rc = tim_hw_info_get(roc_tim);
+	if (rc) {
+		plt_tim_dbg("Failed to get TIM HW info");
+		return 0;
+	}
 
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index e643a14522..e3ff39801c 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -955,6 +955,13 @@ cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
 	return cn20k_sso_updt_tx_adptr_data(event_dev);
 }
 
+static int
+cn20k_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags, uint32_t *caps,
+		   const struct event_timer_adapter_ops **ops)
+{
+	return cnxk_tim_caps_get(evdev, flags, caps, ops, cn20k_sso_set_priv_mem);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -989,6 +996,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
 	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
 
+	.timer_adapter_caps_get = cn20k_tim_caps_get,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -1066,4 +1075,9 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_STASH "=<string>"
-			      CNXK_SSO_FORCE_BP "=1");
+			      CNXK_SSO_FORCE_BP "=1"
+			      CNXK_TIM_DISABLE_NPA "=1"
+			      CNXK_TIM_CHNK_SLOTS "=<int>"
+			      CNXK_TIM_RINGS_LMT "=<int>"
+			      CNXK_TIM_STATS_ENA "=1"
+			      CNXK_TIM_EXT_CLK "=<string>");
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5799e5cc49..b014e549b9 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -5,6 +5,7 @@
 #ifndef __CN20K_WORKER_H__
 #define __CN20K_WORKER_H__
 
+#include <rte_event_timer_adapter.h>
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
@@ -128,6 +129,11 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		/* Mark vector mempool object as get */
 		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
 					  1);
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_TIMER) {
+		struct rte_event_timer *tev = (struct rte_event_timer *)u64[1];
+
+		tev->state = RTE_EVENT_TIMER_NOT_ARMED;
+		u64[1] = tev->ev.u64;
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 79036f7ed8..7037c3943f 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -78,9 +78,25 @@ cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
 	return rc;
 }
 
+static int
+cnxk_tim_enable_hwwqe(struct cnxk_tim_evdev *dev, struct cnxk_tim_ring *tim_ring)
+{
+	struct roc_tim_hwwqe_cfg hwwqe_cfg;
+
+	memset(&hwwqe_cfg, 0, sizeof(hwwqe_cfg));
+	hwwqe_cfg.hwwqe_ena = 1;
+	hwwqe_cfg.grp_ena = 0;
+	hwwqe_cfg.flw_ctrl_ena = 0;
+	hwwqe_cfg.result_offset = CNXK_TIM_HWWQE_RES_OFFSET_B;
+
+	tim_ring->lmt_base = dev->tim.roc_sso->lmt_base;
+	return roc_tim_lf_config_hwwqe(&dev->tim, tim_ring->ring_id, &hwwqe_cfg);
+}
+
 static void
 cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 {
+	struct cnxk_tim_evdev *dev = cnxk_tim_priv_get();
 	uint8_t prod_flag = !tim_ring->prod_type_sp;
 
 	/* [STATS] [DFB/FB] [SP][MP]*/
@@ -98,6 +114,16 @@ cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 #undef FP
 	};
 
+	if (dev == NULL)
+		return;
+
+	if (dev->tim.feat.hwwqe) {
+		cnxk_tim_ops.arm_burst = cnxk_tim_arm_burst_hwwqe;
+		cnxk_tim_ops.arm_tmo_tick_burst = cnxk_tim_arm_tmo_burst_hwwqe;
+		cnxk_tim_ops.cancel_burst = cnxk_tim_timer_cancel_burst_hwwqe;
+		return;
+	}
+
 	cnxk_tim_ops.arm_burst =
 		arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag];
 	cnxk_tim_ops.arm_tmo_tick_burst =
@@ -224,12 +250,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		}
 	}
 
-	if (tim_ring->disable_npa) {
+	if (!dev->tim.feat.hwwqe && tim_ring->disable_npa) {
 		tim_ring->nb_chunks =
 			tim_ring->nb_timers /
 			CNXK_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
 		tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts;
 	} else {
+		tim_ring->disable_npa = 0;
 		tim_ring->nb_chunks = tim_ring->nb_timers;
 	}
 
@@ -255,6 +282,14 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		goto tim_chnk_free;
 	}
 
+	if (dev->tim.feat.hwwqe) {
+		rc = cnxk_tim_enable_hwwqe(dev, tim_ring);
+		if (rc < 0) {
+			plt_err("Failed to enable hwwqe");
+			goto tim_chnk_free;
+		}
+	}
+
 	plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
 	plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 9bd36158d8..a36084d714 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -15,6 +15,7 @@
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
+#include <rte_vect.h>
 
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
@@ -29,6 +30,8 @@
 #define CNXK_TIM_MIN_CHUNK_SLOTS    (0x1)
 #define CNXK_TIM_MAX_CHUNK_SLOTS    (0x1FFE)
 #define CNXK_TIM_MAX_POOL_CACHE_SZ  (16)
+#define CNXK_TIM_HWWQE_RES_OFFSET_B (24)
+#define CNXK_TIM_ENT_PER_LMT	    (7)
 
 #define CN9K_TIM_MIN_TMO_TKS (256)
 
@@ -124,6 +127,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	uintptr_t tbase;
 	uint64_t (*tick_fn)(uint64_t tbase);
 	uint64_t ring_start_cyc;
+	uint64_t lmt_base;
 	struct cnxk_tim_bkt *bkt;
 	struct rte_mempool *chunk_pool;
 	struct rte_reciprocal_u64 fast_div;
@@ -310,11 +314,21 @@ TIM_ARM_FASTPATH_MODES
 TIM_ARM_TMO_FASTPATH_MODES
 #undef FP
 
+uint16_t cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers);
+
+uint16_t cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				      struct rte_event_timer **tim, const uint64_t timeout_tick,
+				      const uint16_t nb_timers);
+
 uint16_t
 cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 			    struct rte_event_timer **tim,
 			    const uint16_t nb_timers);
 
+uint16_t cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+					   struct rte_event_timer **tim, const uint16_t nb_timers);
+
 int cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 				 const struct rte_event_timer *evtim, uint64_t *ticks_remaining);
 
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 5dcf6085dc..16b6b4e33f 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -32,15 +32,6 @@ cnxk_tim_arm_checks(const struct cnxk_tim_ring *const tim_ring,
 	return -EINVAL;
 }
 
-static inline void
-cnxk_tim_format_event(const struct rte_event_timer *const tim,
-		      struct cnxk_tim_ent *const entry)
-{
-	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 |
-		    (tim->ev.event & 0xFFFFFFFFF);
-	entry->wqe = tim->ev.u64;
-}
-
 static __rte_always_inline uint16_t
 cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 			 struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -72,7 +63,25 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	}
 
 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, index, __ATOMIC_RELAXED);
+		__atomic_fetch_add(&tim_ring->arm_cnt, index, rte_memory_order_relaxed);
+
+	return index;
+}
+
+uint16_t
+cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr, struct rte_event_timer **tim,
+			 const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t index;
+
+	for (index = 0; index < nb_timers; index++) {
+		if (cnxk_tim_arm_checks(tim_ring, tim[index]))
+			break;
+
+		if (cnxk_tim_add_entry_hwwqe(tim_ring, tim[index]))
+			break;
+	}
 
 	return index;
 }
@@ -126,12 +135,34 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	}
 
 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers,
-				   __ATOMIC_RELAXED);
+		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers, rte_memory_order_relaxed);
 
 	return set_timers;
 }
 
+uint16_t
+cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+			     struct rte_event_timer **tim, const uint64_t timeout_tick,
+			     const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t idx;
+
+	if (unlikely(!timeout_tick || timeout_tick > tim_ring->nb_bkts)) {
+		const enum rte_event_timer_state state = timeout_tick ?
+								 RTE_EVENT_TIMER_ERROR_TOOLATE :
+								 RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		for (idx = 0; idx < nb_timers; idx++)
+			tim[idx]->state = state;
+
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	return cnxk_tim_add_entry_tmo_hwwqe(tim_ring, tim, timeout_tick * tim_ring->tck_int,
+					    nb_timers);
+}
+
 #define FP(_name, _f2, _f1, _flags)                                            \
 	uint16_t __rte_noinline cnxk_tim_arm_tmo_tick_burst_##_name(           \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -153,7 +184,7 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	int ret;
 
 	RTE_SET_USED(adptr);
-	rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+	rte_atomic_thread_fence(rte_memory_order_acquire);
 	for (index = 0; index < nb_timers; index++) {
 		if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) {
 			rte_errno = EALREADY;
@@ -174,6 +205,38 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers)
+{
+	uint64_t *status;
+	uint16_t i;
+
+	RTE_SET_USED(adptr);
+	for (i = 0; i < nb_timers; i++) {
+		if (tim[i]->state == RTE_EVENT_TIMER_CANCELED) {
+			rte_errno = EALREADY;
+			break;
+		}
+
+		if (tim[i]->state != RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		status = &tim[i]->impl_opaque[1];
+		if (!__atomic_compare_exchange_n(status, (uint64_t *)&tim[i], 0, 0,
+						 rte_memory_order_release,
+						 rte_memory_order_relaxed)) {
+			rte_errno = ENOENT;
+			break;
+		}
+		tim[i]->state = RTE_EVENT_TIMER_CANCELED;
+	}
+
+	return i;
+}
+
 int
 cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 			     const struct rte_event_timer *evtim, uint64_t *ticks_remaining)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index f530d8c5c4..b9537bdf1c 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -132,6 +132,13 @@ cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
 	return (n - (d * rte_reciprocal_divide_u64(n, &R)));
 }
 
+static inline void
+cnxk_tim_format_event(const struct rte_event_timer *const tim, struct cnxk_tim_ent *const entry)
+{
+	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	entry->wqe = tim->ev.u64;
+}
+
 static __rte_always_inline void
 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
 			   const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
@@ -574,6 +581,196 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 	return nb_timers;
 }
 
+static int
+cnxk_tim_add_entry_hwwqe(struct cnxk_tim_ring *const tim_ring, struct rte_event_timer *const tim)
+{
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint64_t *status;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+	int rc = 0;
+
+	status = &tim->impl_opaque[0];
+	status[0] = 0;
+	status[1] = 0;
+
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	lmt = (uint64_t *)lmt_addr;
+
+	lmt[0] = tim->timeout_ticks * tim_ring->tck_int;
+	lmt[1] = 0x1;
+	lmt[2] = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	lmt[3] = (uint64_t)tim;
+
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (1UL << 4);
+	roc_lmt_submit_steorl(wdata, pa);
+
+	do {
+		rsp = __atomic_load_n(status, rte_memory_order_relaxed);
+		rsp &= 0xF0UL;
+	} while (!rsp);
+
+	rsp >>= 4;
+	switch (rsp) {
+	case 0x3:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		rc = !rc;
+		break;
+	case 0x4:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+		rc = !rc;
+		break;
+	case 0x1:
+		tim->state = RTE_EVENT_TIMER_ARMED;
+		break;
+	default:
+		tim->state = RTE_EVENT_TIMER_ERROR;
+		rc = !rc;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+cnxk_tim_add_entry_tmo_hwwqe(struct cnxk_tim_ring *const tim_ring,
+			     struct rte_event_timer **const tim, uint64_t intvl, uint16_t nb_timers)
+{
+	uint16_t cnt, i, j, done;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint64_t *status;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+
+	/* We have 32 LMTLINES per core, but use only 1 line as we need to check status */
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	done = 0;
+	lmt = (uint64_t *)lmt_addr;
+	/* We can do upto 7 timers per LMTLINE */
+	cnt = nb_timers / CNXK_TIM_ENT_PER_LMT;
+
+	lmt[0] = intvl;
+	lmt[1] = 0x1; /* Always relative */
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)(CNXK_TIM_ENT_PER_LMT << 4);
+	for (i = 0; i < cnt; i++) {
+		status = &tim[i * CNXK_TIM_ENT_PER_LMT]->impl_opaque[0];
+
+		for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+			cnxk_tim_format_event(tim[(i * CNXK_TIM_ENT_PER_LMT) + j],
+					      (struct cnxk_tim_ent *)&lmt[(j << 1) + 2]);
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[0] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[1] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = __atomic_load_n(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += CNXK_TIM_ENT_PER_LMT;
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x4:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+					if ((__atomic_load_n(&tim[(i * CNXK_TIM_ENT_PER_LMT) + j]
+								      ->impl_opaque[0],
+							     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+							RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+			goto done;
+		}
+	}
+
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)((nb_timers - cnt) << 4);
+	if (nb_timers - cnt) {
+		status = &tim[cnt]->impl_opaque[0];
+
+		for (i = 0; i < nb_timers - cnt; i++) {
+			cnxk_tim_format_event(tim[cnt + i],
+					      (struct cnxk_tim_ent *)&lmt[(i << 1) + 2]);
+			tim[cnt + i]->impl_opaque[0] = 0;
+			tim[cnt + i]->impl_opaque[1] = 0;
+			tim[cnt + i]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = __atomic_load_n(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += (nb_timers - cnt);
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x4:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < nb_timers - cnt; j++) {
+					if ((__atomic_load_n(&tim[cnt + j]->impl_opaque[0],
+							     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+done:
+	return done;
+}
+
 static int
 cnxk_tim_rm_entry(struct rte_event_timer *tim)
 {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* Re: [PATCH 06/20] event/cnxk: add CN20K SSO enqueue fast path
  2024-10-03 13:22 ` [PATCH 06/20] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
@ 2024-10-22  1:46   ` Stephen Hemminger
  0 siblings, 0 replies; 181+ messages in thread
From: Stephen Hemminger @ 2024-10-22  1:46 UTC (permalink / raw)
  To: pbhagavatula; +Cc: jerinj, Shijith Thotton, Anatoly Burakov, dev

On Thu, 3 Oct 2024 18:52:23 +0530
<pbhagavatula@marvell.com> wrote:

Fix this in next version.

Warning in drivers/event/cnxk/cn20k_worker.c:
Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx



> +
> +static inline int32_t
> +sso_read_xaq_space(struct cn20k_sso_hws *ws)
> +{
> +	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, rte_memory_order_relaxed)) * 352;
> +}

^ permalink raw reply	[flat|nested] 181+ messages in thread

* Re: [PATCH 07/20] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-03 13:22 ` [PATCH 07/20] event/cnxk: add CN20K SSO dequeue " pbhagavatula
@ 2024-10-22  1:49   ` Stephen Hemminger
  2024-10-22  8:54     ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula
  0 siblings, 1 reply; 181+ messages in thread
From: Stephen Hemminger @ 2024-10-22  1:49 UTC (permalink / raw)
  To: pbhagavatula; +Cc: jerinj, Shijith Thotton, dev

On Thu, 3 Oct 2024 18:52:24 +0530
<pbhagavatula@marvell.com> wrote:

> +static __rte_always_inline uint16_t
> +cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
> +{
> +	union {
> +		__uint128_t get_work;
> +		uint64_t u64[2];
> +	} gw;
> +
> +	gw.get_work = ws->gw_wdata;
> +#if defined(RTE_ARCH_ARM64)
> +#if defined(__clang__)
> +	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
> +	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
> +#if defined(RTE_ARM_USE_WFE)
> +	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
> +	asm volatile(PLT_CPU_FEATURE_PREAMBLE
> +		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
> +		     "		tbz %[x0], %[pend_gw], done%=	\n"
> +		     "		sevl					\n"
> +		     "rty%=:	wfe					\n"
> +		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
> +		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
> +		     "done%=:						\n"
> +		     "		dmb ld					\n"
> +		     : [x0] "+r" (x0), [x1] "+r" (x1)
> +		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
> +		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
> +		     : "memory");
> +#else
> +	asm volatile(".arch armv8-a+lse\n"
> +		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
> +		     : [x0] "+r" (x0), [x1] "+r" (x1)
> +		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
> +		     : "memory");
> +#endif
> +	gw.u64[0] = x0;
> +	gw.u64[1] = x1;
> +#else
> +#if defined(RTE_ARM_USE_WFE)
> +	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
> +	asm volatile(PLT_CPU_FEATURE_PREAMBLE
> +		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
> +		     "		tbz %[wdata], %[pend_gw], done%=	\n"
> +		     "		sevl					\n"
> +		     "rty%=:	wfe					\n"
> +		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
> +		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
> +		     "done%=:						\n"
> +		     "		dmb ld					\n"
> +		     : [wdata] "=&r"(gw.get_work)
> +		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
> +		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
> +		     : "memory");
> +#else
> +	asm volatile(PLT_CPU_FEATURE_PREAMBLE
> +		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
> +		     : [wdata] "+r"(gw.get_work)
> +		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
> +		     : "memory");
> +#endif
> +#endif

This generates lots of warnings like:

WARNING:QUOTED_WHITESPACE_BEFORE_NEWLINE: unnecessary whitespace before a quoted newline
#131: FILE: drivers/event/cnxk/cn20k_worker.h:39:
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"

But the bigger problem is burying assembly in a driver is a bad idea.
The use of ARM primitives should be done in EAL, not down in the PMD.



^ permalink raw reply	[flat|nested] 181+ messages in thread

* Re: [PATCH 01/20] common/cnxk: implement SSO HW info
  2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
                   ` (19 preceding siblings ...)
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
@ 2024-10-22  1:52 ` Stephen Hemminger
  2024-10-22  8:53   ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula
  20 siblings, 1 reply; 181+ messages in thread
From: Stephen Hemminger @ 2024-10-22  1:52 UTC (permalink / raw)
  To: pbhagavatula
  Cc: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Ankur Dwivedi, Anoob Joseph,
	Tejasree Kondoj, Shijith Thotton, dev

On Thu, 3 Oct 2024 18:52:18 +0530
<pbhagavatula@marvell.com> wrote:

> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Add SSO HW info mbox to get hardware capabilities, and reuse
> them instead of depending on hardcoded values.
> Remove redundant includes.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---

You need to fix direct use of __atomic in this series.

^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 01/22] event/cnxk: use stdatomic API
  2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
                     ` (19 preceding siblings ...)
  2024-10-21 20:57   ` [PATCH v2 21/21] event/cnxk: add CN20K timer adapter pbhagavatula
@ 2024-10-22  8:46   ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 02/22] common/cnxk: implement SSO HW info pbhagavatula
                       ` (21 more replies)
  20 siblings, 22 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Replace gcc inbuilt __atomic_xxx intrinsics with rte_atomic_xxx API.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v2 Changes:
- Rebase and remove single dequeue and enqueue functions.
v3 Changes:
- Remove __atomic builtins.

 drivers/event/cnxk/cn10k_eventdev.c         |  6 +--
 drivers/event/cnxk/cn10k_eventdev.h         |  4 +-
 drivers/event/cnxk/cn10k_tx_worker.h        |  7 ++-
 drivers/event/cnxk/cn10k_worker.c           | 15 +++---
 drivers/event/cnxk/cn10k_worker.h           |  2 +-
 drivers/event/cnxk/cn9k_eventdev.c          |  8 +--
 drivers/event/cnxk/cn9k_worker.h            | 18 ++++---
 drivers/event/cnxk/cnxk_eventdev.h          |  4 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c | 60 ++++++++++-----------
 drivers/event/cnxk/cnxk_tim_evdev.c         |  4 +-
 drivers/event/cnxk/cnxk_tim_evdev.h         | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.c        | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.h        | 57 ++++++++++----------
 drivers/event/cnxk/cnxk_worker.h            |  3 +-
 drivers/net/cnxk/cn9k_ethdev.h              |  2 +-
 15 files changed, 108 insertions(+), 102 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 531c489172..1fe499308f 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -94,9 +94,9 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	uint64_t val;

 	ws->grp_base = grp_base;
-	ws->fc_mem = (int64_t *)dev->fc_iova;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 	ws->xaq_lmt = dev->xaq_lmt;
-	ws->fc_cache_space = dev->fc_cache_space;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;

 	/* Set get_work timeout for HWS */
@@ -835,7 +835,7 @@ cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 	for (i = 0; i < dev->nb_event_ports; i++) {
 		struct cn10k_sso_hws *ws = event_dev->data->ports[i];
 		ws->xaq_lmt = dev->xaq_lmt;
-		ws->fc_mem = (int64_t *)dev->fc_iova;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 		ws->tstamp = dev->tstamp;
 		if (lookup_mem)
 			ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index 372121465c..b8395aa314 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -19,8 +19,8 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	uint64_t meta_aura;
 	/* Add Work Fastpath data */
-	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
-	int64_t *fc_cache_space;
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
 	int32_t xaq_lmt;
diff --git a/drivers/event/cnxk/cn10k_tx_worker.h b/drivers/event/cnxk/cn10k_tx_worker.h
index 0695ea23e1..19cb2e22e5 100644
--- a/drivers/event/cnxk/cn10k_tx_worker.h
+++ b/drivers/event/cnxk/cn10k_tx_worker.h
@@ -51,7 +51,9 @@ cn10k_sso_txq_fc_wait(const struct cn10k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -60,7 +62,8 @@ static __rte_always_inline int32_t
 cn10k_sso_sq_depth(const struct cn10k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index a0e85face1..dbacb6de95 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -16,7 +16,7 @@ cn10k_sso_hws_new_event(struct cn10k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -80,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
-	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) *
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
 	       ROC_SSO_XAE_PER_XAQ;
 }

@@ -90,19 +90,20 @@ sso_lmt_aw_wait_fc(struct cn10k_sso_hws *ws, int64_t req)
 	int64_t cached, refill;

 retry:
-	while (__atomic_load_n(ws->fc_cache_space, __ATOMIC_RELAXED) < 0)
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
 		;

-	cached = __atomic_fetch_sub(ws->fc_cache_space, req, __ATOMIC_ACQUIRE) - req;
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
 	/* Check if there is enough space, else update and retry. */
 	if (cached < 0) {
 		/* Check if we have space else retry. */
 		do {
 			refill = sso_read_xaq_space(ws);
 		} while (refill <= 0);
-		__atomic_compare_exchange(ws->fc_cache_space, &cached, &refill,
-					  0, __ATOMIC_RELEASE,
-					  __ATOMIC_RELAXED);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
 		goto retry;
 	}
 }
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 4785cc6575..8e46ec36cd 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -311,7 +311,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 		roc_load_pair(gw.u64[0], gw.u64[1],
 			      ws->base + SSOW_LF_GWS_WQE0);
 	} while (gw.u64[0] & BIT_ULL(63));
-	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
 #endif
 	ws->gw_rdata = gw.u64[0];
 	if (gw.u64[1])
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 377e910837..7684ae5701 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -74,7 +74,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	if (dev->dual_ws) {
 		dws = hws;
 		dws->grp_base = grp_base;
-		dws->fc_mem = (uint64_t *)dev->fc_iova;
+		dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		dws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, dws->base[0] + SSOW_LF_GWS_NW_TIM);
@@ -82,7 +82,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	} else {
 		ws = hws;
 		ws->grp_base = grp_base;
-		ws->fc_mem = (uint64_t *)dev->fc_iova;
+		ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		ws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
@@ -896,14 +896,14 @@ cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 			struct cn9k_sso_hws_dual *dws =
 				event_dev->data->ports[i];
 			dws->xaq_lmt = dev->xaq_lmt;
-			dws->fc_mem = (uint64_t *)dev->fc_iova;
+			dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			dws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				dws->lookup_mem = lookup_mem;
 		} else {
 			struct cn9k_sso_hws *ws = event_dev->data->ports[i];
 			ws->xaq_lmt = dev->xaq_lmt;
-			ws->fc_mem = (uint64_t *)dev->fc_iova;
+			ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			ws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index c92fa72f11..286b2b722e 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -28,7 +28,7 @@ cn9k_sso_hws_new_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -71,7 +71,7 @@ cn9k_sso_hws_new_event_wait(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (ws->xaq_lmt <= __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED))
+	while (ws->xaq_lmt <= rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -93,7 +93,7 @@ cn9k_sso_hws_forward_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(ws->base);
 		cn9k_sso_hws_new_event_wait(ws, ev);
 	}
@@ -110,7 +110,7 @@ cn9k_sso_hws_dual_new_event(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (dws->xaq_lmt <= *dws->fc_mem)
 		return 0;

@@ -128,7 +128,7 @@ cn9k_sso_hws_dual_new_event_wait(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (dws->xaq_lmt <= __atomic_load_n(dws->fc_mem, __ATOMIC_RELAXED))
+	while (dws->xaq_lmt <= rte_atomic_load_explicit(dws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -151,7 +151,7 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, uint64_t base,
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(base);
 		cn9k_sso_hws_dual_new_event_wait(dws, ev);
 	}
@@ -599,7 +599,9 @@ cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -768,7 +770,7 @@ static __rte_always_inline int32_t
 cn9k_sso_sq_depth(const struct cn9k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit(txq->fc_mem, rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index f147ef3c78..982bbb6a9b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -136,7 +136,7 @@ struct __rte_cache_aligned cn9k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
@@ -154,7 +154,7 @@ struct __rte_cache_aligned cn9k_sso_hws_dual {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 95c0f1b1f7..a4615c1356 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -63,7 +63,7 @@ seqn_list_update(int val)
 		return -1;

 	seqn_list[seqn_list_index++] = val;
-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	return 0;
 }

@@ -82,7 +82,7 @@ seqn_list_check(int limit)
 }

 struct test_core_param {
-	uint32_t *total_events;
+	uint32_t __rte_atomic *total_events;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port;
 	uint8_t sched_type;
@@ -540,13 +540,13 @@ static int
 worker_multi_port_fn(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;
 	int ret;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;
@@ -554,30 +554,30 @@ worker_multi_port_fn(void *arg)
 		ret = validate_event(&ev);
 		RTE_TEST_ASSERT_SUCCESS(ret, "Failed to validate event");
 		rte_pktmbuf_free(ev.mbuf);
-		__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+		rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 	}

 	return 0;
 }

 static inline int
-wait_workers_to_join(const uint32_t *count)
+wait_workers_to_join(const uint32_t __rte_atomic *count)
 {
 	uint64_t cycles, print_cycles;

 	cycles = rte_get_timer_cycles();
 	print_cycles = cycles;
-	while (__atomic_load_n(count, __ATOMIC_RELAXED)) {
+	while (rte_atomic_load_explicit(count, rte_memory_order_relaxed)) {
 		uint64_t new_cycles = rte_get_timer_cycles();

 		if (new_cycles - print_cycles > rte_get_timer_hz()) {
 			plt_info("Events %d",
-				 __atomic_load_n(count, __ATOMIC_RELAXED));
+				 rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			print_cycles = new_cycles;
 		}
 		if (new_cycles - cycles > rte_get_timer_hz() * 10000000000) {
 			plt_err("No schedules for seconds, deadlock (%d)",
-				__atomic_load_n(count, __ATOMIC_RELAXED));
+				rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			rte_event_dev_dump(evdev, stdout);
 			cycles = new_cycles;
 			return -1;
@@ -593,7 +593,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 			int (*worker_thread)(void *), uint32_t total_events,
 			uint8_t nb_workers, uint8_t sched_type)
 {
-	uint32_t atomic_total_events;
+	uint32_t __rte_atomic atomic_total_events;
 	struct test_core_param *param;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port = 0;
@@ -603,7 +603,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 	if (!nb_workers)
 		return 0;

-	__atomic_store_n(&atomic_total_events, total_events, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&atomic_total_events, total_events, rte_memory_order_relaxed);
 	seqn_list_init();

 	param = malloc(sizeof(struct test_core_param) * nb_workers);
@@ -640,7 +640,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		param[port].sched_type = sched_type;
 		param[port].port = port;
 		param[port].dequeue_tmo_ticks = dequeue_tmo_ticks;
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		w_lcore = rte_get_next_lcore(w_lcore, 1, 0);
 		if (w_lcore == RTE_MAX_LCORE) {
 			plt_err("Failed to get next available lcore");
@@ -651,7 +651,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		rte_eal_remote_launch(worker_thread, &param[port], w_lcore);
 	}

-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	ret = wait_workers_to_join(&atomic_total_events);
 	free(param);

@@ -890,13 +890,13 @@ worker_flow_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -916,8 +916,8 @@ worker_flow_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1046,13 +1046,13 @@ worker_group_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -1072,8 +1072,8 @@ worker_group_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1205,19 +1205,19 @@ static int
 worker_flow_based_pipeline_max_stages_rand_sched_type(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.sub_event_type == MAX_STAGES) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.sub_event_type++;
@@ -1284,16 +1284,16 @@ worker_queue_based_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
@@ -1329,16 +1329,16 @@ worker_mixed_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* Last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index bba70646fa..74a6da5070 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -358,7 +358,7 @@ cnxk_tim_stats_get(const struct rte_event_timer_adapter *adapter,
 		tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;

 	stats->evtim_exp_count =
-		__atomic_load_n(&tim_ring->arm_cnt, __ATOMIC_RELAXED);
+		rte_atomic_load_explicit(&tim_ring->arm_cnt, rte_memory_order_relaxed);
 	stats->ev_enq_count = stats->evtim_exp_count;
 	stats->adapter_tick_count =
 		rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div);
@@ -370,7 +370,7 @@ cnxk_tim_stats_reset(const struct rte_event_timer_adapter *adapter)
 {
 	struct cnxk_tim_ring *tim_ring = adapter->data->adapter_priv;

-	__atomic_store_n(&tim_ring->arm_cnt, 0, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&tim_ring->arm_cnt, 0, rte_memory_order_relaxed);
 	return 0;
 }

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 6cf10dbf4d..f4c61dfb44 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -108,15 +108,15 @@ struct cnxk_tim_evdev {
 struct cnxk_tim_bkt {
 	uint64_t first_chunk;
 	union {
-		uint64_t w1;
+		uint64_t __rte_atomic w1;
 		struct {
-			uint32_t nb_entry;
+			uint32_t __rte_atomic nb_entry;
 			uint8_t sbt : 1;
 			uint8_t hbt : 1;
 			uint8_t bsk : 1;
 			uint8_t rsvd : 5;
-			uint8_t lock;
-			int16_t chunk_remainder;
+			uint8_t __rte_atomic lock;
+			int16_t __rte_atomic chunk_remainder;
 		};
 	};
 	uint64_t current_chunk;
@@ -134,7 +134,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	struct rte_reciprocal_u64 fast_div;
 	struct rte_reciprocal_u64 fast_bkt;
 	uint64_t tck_int;
-	uint64_t arm_cnt;
+	uint64_t __rte_atomic arm_cnt;
 	uintptr_t base;
 	uint8_t prod_type_sp;
 	uint8_t enable_stats;
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 1f2f2fe5d8..db31f91818 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -70,7 +70,7 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, index, __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, index, rte_memory_order_relaxed);

 	return index;
 }
@@ -124,8 +124,8 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers,
-				   __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, set_timers,
+					      rte_memory_order_relaxed);

 	return set_timers;
 }
@@ -151,7 +151,7 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	int ret;

 	RTE_SET_USED(adptr);
-	rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+	rte_atomic_thread_fence(rte_memory_order_acquire);
 	for (index = 0; index < nb_timers; index++) {
 		if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) {
 			rte_errno = EALREADY;
@@ -193,7 +193,7 @@ cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 		return -ENOENT;

 	bkt = (struct cnxk_tim_bkt *)evtim->impl_opaque[1];
-	sema = __atomic_load_n(&bkt->w1, rte_memory_order_acquire);
+	sema = rte_atomic_load_explicit(&bkt->w1, rte_memory_order_acquire);
 	if (cnxk_tim_bkt_get_hbt(sema) || !cnxk_tim_bkt_get_nent(sema))
 		return -ENOENT;

diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index f530d8c5c4..e52eadbc08 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -23,19 +23,19 @@ cnxk_tim_bkt_fetch_rem(uint64_t w1)
 static inline int16_t
 cnxk_tim_bkt_get_rem(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
+	return rte_atomic_load_explicit(&bktp->chunk_remainder, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_set_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_sub_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline uint8_t
@@ -56,20 +56,20 @@ cnxk_tim_bkt_clr_bsk(struct cnxk_tim_bkt *bktp)
 	/* Clear everything except lock. */
 	const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema_lock(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
-				  __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
+					     rte_memory_order_acquire);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA, rte_memory_order_relaxed);
 }

 static inline uint64_t
@@ -77,19 +77,19 @@ cnxk_tim_bkt_inc_lock(struct cnxk_tim_bkt *bktp)
 {
 	const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, v, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_dec_lock(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_release);
 }

 static inline void
 cnxk_tim_bkt_dec_lock_relaxed(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_relaxed);
 }

 static inline uint32_t
@@ -102,19 +102,19 @@ cnxk_tim_bkt_get_nent(uint64_t w1)
 static inline void
 cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_add(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, 1, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent_relaxed(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELEASE);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_release);
 }

 static inline uint64_t
@@ -123,7 +123,7 @@ cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
 	const uint64_t v =
 		~(TIM_BUCKET_W1_M_NUM_ENTRIES << TIM_BUCKET_W1_S_NUM_ENTRIES);

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL) & v;
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel) & v;
 }

 static inline uint64_t
@@ -273,8 +273,8 @@ cnxk_tim_add_entry_sp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -356,8 +356,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -385,8 +385,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 			     : [crem] "r"(&bkt->w1)
 			     : "memory");
 #else
-		while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
-		       0)
+		while (rte_atomic_load_explicit((int64_t __rte_atomic *)&bkt->w1,
+						rte_memory_order_relaxed) < 0)
 			;
 #endif
 		goto __retry;
@@ -408,15 +408,14 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 		*chunk = *pent;
 		if (cnxk_tim_bkt_fetch_lock(lock_sema)) {
 			do {
-				lock_sema = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				lock_sema = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (cnxk_tim_bkt_fetch_lock(lock_sema) - 1);
 		}
-		rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+		rte_atomic_thread_fence(rte_memory_order_acquire);
 		mirr_bkt->current_chunk = (uintptr_t)chunk;
-		__atomic_store_n(&bkt->chunk_remainder,
-				 tim_ring->nb_chunk_slots - 1,
-				 __ATOMIC_RELEASE);
+		rte_atomic_store_explicit(&bkt->chunk_remainder, tim_ring->nb_chunk_slots - 1,
+					  rte_memory_order_release);
 	} else {
 		chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
 		chunk += tim_ring->nb_chunk_slots - rem;
@@ -489,8 +488,8 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -521,7 +520,7 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 			     : [lock] "r"(&bkt->lock)
 			     : "memory");
 #else
-		while (__atomic_load_n(&bkt->lock, __ATOMIC_RELAXED))
+		while (rte_atomic_load_explicit(&bkt->lock, rte_memory_order_relaxed))
 			;
 #endif
 		goto __retry;
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 0e0d728ba4..3592344e04 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -33,7 +33,8 @@ cnxk_sso_hws_swtag_desched(uint32_t tag, uint8_t new_tt, uint16_t grp,
 	uint64_t val;

 	val = tag | ((uint64_t)(new_tt & 0x3) << 32) | ((uint64_t)grp << 34);
-	__atomic_store_n((uint64_t *)swtag_desched_op, val, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit((uint64_t __rte_atomic *)swtag_desched_op, val,
+				  rte_memory_order_release);
 }

 static __rte_always_inline void
diff --git a/drivers/net/cnxk/cn9k_ethdev.h b/drivers/net/cnxk/cn9k_ethdev.h
index 4933954c33..c0e649655d 100644
--- a/drivers/net/cnxk/cn9k_ethdev.h
+++ b/drivers/net/cnxk/cn9k_ethdev.h
@@ -11,7 +11,7 @@
 struct cn9k_eth_txq {
 	uint64_t send_hdr_w0;
 	int64_t fc_cache_pkts;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	void *lmt_addr;
 	rte_iova_t io_addr;
 	uint64_t lso_tun_fmt;
--
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 02/22] common/cnxk: implement SSO HW info
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
                       ` (20 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Ankur Dwivedi, Anoob Joseph, Tejasree Kondoj, Pavan Nikhilesh,
	Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HW info mbox to get hardware capabilities, and reuse
them instead of depending on hardcoded values.
Remove redundant includes.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_mbox.h              | 28 ++++++++++
 drivers/common/cnxk/roc_sso.c               | 58 ++++++++++++++++++---
 drivers/common/cnxk/roc_sso.h               |  9 ++--
 drivers/common/cnxk/version.map             |  1 +
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c   |  5 +-
 drivers/crypto/cnxk/cn9k_cryptodev_ops.c    |  9 +---
 drivers/event/cnxk/cn10k_eventdev.c         |  1 +
 drivers/event/cnxk/cn10k_eventdev.h         |  1 +
 drivers/event/cnxk/cn10k_worker.c           |  6 ++-
 drivers/event/cnxk/cnxk_eventdev.c          |  4 +-
 drivers/event/cnxk/cnxk_eventdev.h          |  3 --
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  2 +
 drivers/event/cnxk/cnxk_eventdev_stats.c    |  2 +
 drivers/event/cnxk/cnxk_tim_evdev.c         |  2 +-
 drivers/event/cnxk/cnxk_tim_worker.c        |  2 +
 drivers/event/cnxk/cnxk_worker.c            |  4 +-
 16 files changed, 103 insertions(+), 34 deletions(-)

diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index dd65946e9e..63139b5517 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,7 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
 	  tim_lf_alloc_rsp)                                                    \
@@ -2119,6 +2120,33 @@ struct ssow_chng_mship {
 	uint16_t __io hwgrps[MAX_RVU_BLKLF_CNT]; /* Array of hwgrps. */
 };

+struct sso_feat_info {
+	uint8_t __io hw_flr : 1;
+	uint8_t __io hw_prefetch : 1;
+	uint8_t __io sw_prefetch : 1;
+	uint8_t __io lsw : 1;
+	uint8_t __io fwd_grp : 1;
+	uint8_t __io eva_present : 1;
+	uint8_t __io no_nsched : 1;
+	uint8_t __io tag_cfg : 1;
+	uint8_t __io gwc_per_core;
+	uint16_t __io hws;
+	uint16_t __io hwgrps;
+	uint16_t __io hwgrps_per_pf;
+	uint16_t __io iue;
+	uint16_t __io taq_lines;
+	uint16_t __io taq_ent_per_line;
+	uint16_t __io xaq_buf_size;
+	uint16_t __io xaq_wq_entries;
+	uint32_t __io eva_ctx_per_hwgrp;
+	uint64_t __io rsvd[2];
+};
+
+struct sso_hw_info {
+	struct mbox_msghdr hdr;
+	struct sso_feat_info feat;
+};
+
 struct sso_hw_setconfig {
 	struct mbox_msghdr hdr;
 	uint32_t __io npa_aura_id;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 2e3b134bfc..8a219b985b 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -191,7 +191,7 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 		goto exit;
 	}

-	roc_sso->max_hwgrp = rsrc_cnt->sso;
+	roc_sso->max_hwgrp = PLT_MIN(rsrc_cnt->sso, roc_sso->feat.hwgrps_per_pf);
 	roc_sso->max_hws = rsrc_cnt->ssow;

 	rc = 0;
@@ -200,6 +200,37 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 	return rc;
 }

+static int
+sso_hw_info_get(struct roc_sso *roc_sso)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct sso_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_sso_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc == MBOX_MSG_INVALID) {
+		roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	} else {
+		mbox_memcpy(&roc_sso->feat, &rsp->feat, sizeof(roc_sso->feat));
+
+		if (!roc_sso->feat.hwgrps_per_pf)
+			roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	}
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 void
 sso_hws_link_modify(uint8_t hws, uintptr_t base, struct plt_bitmap *bmp, uint16_t hwgrp[],
 		    uint16_t n, uint8_t set, uint16_t enable)
@@ -319,6 +350,12 @@ roc_sso_hwgrp_base_get(struct roc_sso *roc_sso, uint16_t hwgrp)
 	return dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | hwgrp << 12);
 }

+uint16_t
+roc_sso_pf_func_get(void)
+{
+	return idev_sso_pffunc_get();
+}
+
 uint64_t
 roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 {
@@ -670,9 +707,8 @@ roc_sso_hwgrp_init_xaq_aura(struct roc_sso *roc_sso, uint32_t nb_xae)
 	struct dev *dev = &sso->dev;
 	int rc;

-	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae,
-				     roc_sso->xae_waes, roc_sso->xaq_buf_size,
-				     roc_sso->nb_hwgrp);
+	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae, roc_sso->feat.xaq_wq_entries,
+				     roc_sso->feat.xaq_buf_size, roc_sso->nb_hwgrp);
 	return rc;
 }

@@ -953,9 +989,11 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 		goto hwgrp_alloc_fail;
 	}

-	roc_sso->xaq_buf_size = rsp_hwgrp->xaq_buf_size;
-	roc_sso->xae_waes = rsp_hwgrp->xaq_wq_entries;
-	roc_sso->iue = rsp_hwgrp->in_unit_entries;
+	if (!roc_sso->feat.xaq_buf_size || !roc_sso->feat.xaq_wq_entries || !roc_sso->feat.iue) {
+		roc_sso->feat.xaq_buf_size = rsp_hwgrp->xaq_buf_size;
+		roc_sso->feat.xaq_wq_entries = rsp_hwgrp->xaq_wq_entries;
+		roc_sso->feat.iue = rsp_hwgrp->in_unit_entries;
+	}

 	rc = sso_msix_fill(roc_sso, nb_hws, nb_hwgrp);
 	if (rc < 0) {
@@ -1059,6 +1097,12 @@ roc_sso_dev_init(struct roc_sso *roc_sso)
 		goto fail;
 	}

+	rc = sso_hw_info_get(roc_sso);
+	if (rc < 0) {
+		plt_err("Failed to get SSO HW info");
+		goto fail;
+	}
+
 	rc = sso_rsrc_get(roc_sso);
 	if (rc < 0) {
 		plt_err("Failed to get SSO resources");
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 4ac901762e..021db22c86 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -8,7 +8,7 @@
 #include "hw/ssow.h"

 #define ROC_SSO_AW_PER_LMT_LINE_LOG2 3
-#define ROC_SSO_XAE_PER_XAQ	     352
+#define ROC_SSO_MAX_HWGRP_PER_PF     256

 struct roc_sso_hwgrp_qos {
 	uint16_t hwgrp;
@@ -57,9 +57,7 @@ struct roc_sso {
 	uintptr_t lmt_base;
 	struct roc_sso_xaq_data xaq;
 	/* HW Const. */
-	uint32_t xae_waes;
-	uint32_t xaq_buf_size;
-	uint32_t iue;
+	struct sso_feat_info feat;
 	/* Private data. */
 #define ROC_SSO_MEM_SZ (16 * 1024)
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
@@ -103,6 +101,9 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);

+/* Utility function */
+uint16_t __roc_api roc_sso_pf_func_get(void);
+
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 877333b80c..de748ac409 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -516,6 +516,7 @@ INTERNAL {
 	roc_sso_hws_gwc_invalidate;
 	roc_sso_hws_unlink;
 	roc_sso_ns_to_gw;
+	roc_sso_pf_func_get;
 	roc_sso_rsrc_fini;
 	roc_sso_rsrc_init;
 	roc_tim_fini;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 88ea032bcb..dbebc5aef1 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -11,10 +11,7 @@

 #include <ethdev_driver.h>

-#include "roc_cpt.h"
-#include "roc_idev.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"

 #include "cn10k_cryptodev.h"
 #include "cn10k_cryptodev_event_dp.h"
diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
index ae00af5019..8d10bc9f9b 100644
--- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
@@ -8,14 +8,7 @@
 #include <rte_ip.h>
 #include <rte_vect.h>

-#include "roc_cpt.h"
-#if defined(__aarch64__)
-#include "roc_io.h"
-#else
-#include "roc_io_generic.h"
-#endif
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"

 #include "cn9k_cryptodev.h"
 #include "cn9k_cryptodev_ops.h"
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 1fe499308f..b6c4896c69 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -64,6 +64,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;

 	return ws;
 }
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index b8395aa314..4f0eab8acb 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -23,6 +23,7 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
+	uint16_t xae_waes;
 	int32_t xaq_lmt;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index dbacb6de95..f47b1276d3 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */

+#include "roc_api.h"
+
 #include "cn10k_worker.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -81,7 +83,7 @@ static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
 	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
-	       ROC_SSO_XAE_PER_XAQ;
+		ws->xae_waes;
 }

 static inline void
@@ -399,7 +401,7 @@ cn10k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
 	int32_t space;

 	/* Do a common back-pressure check and return */
-	space = sso_read_xaq_space(ws) - ROC_SSO_XAE_PER_XAQ;
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
 	if (space <= 0)
 		return 0;
 	nb_events = space < nb_events ? space : nb_events;
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 84a55511a3..ab7420ab79 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -2,7 +2,7 @@
  * Copyright(C) 2021 Marvell.
  */

-#include "roc_npa.h"
+#include "roc_api.h"

 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
@@ -47,7 +47,7 @@ cnxk_sso_xaq_allocate(struct cnxk_sso_evdev *dev)
 	if (dev->num_events > 0)
 		xae_cnt = dev->num_events;
 	else
-		xae_cnt = dev->sso.iue;
+		xae_cnt = dev->sso.feat.iue;

 	if (dev->xae_cnt)
 		xae_cnt += dev->xae_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 982bbb6a9b..904a9b022d 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -21,9 +21,6 @@

 #include "cnxk_eventdev_dp.h"

-#include "roc_platform.h"
-#include "roc_sso.h"
-
 #include "cnxk_tim_evdev.h"

 #define CNXK_SSO_XAE_CNT   "xae_cnt"
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index a4615c1356..311de3d92b 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -18,6 +18,8 @@
 #include <rte_random.h>
 #include <rte_test.h>

+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"

diff --git a/drivers/event/cnxk/cnxk_eventdev_stats.c b/drivers/event/cnxk/cnxk_eventdev_stats.c
index a8a87a06e4..6dea91aedf 100644
--- a/drivers/event/cnxk/cnxk_eventdev_stats.c
+++ b/drivers/event/cnxk/cnxk_eventdev_stats.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */

+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 74a6da5070..27a4dfb490 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -4,7 +4,7 @@

 #include <math.h>

-#include "roc_npa.h"
+#include "roc_api.h"

 #include "cnxk_eventdev.h"
 #include "cnxk_tim_evdev.h"
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index db31f91818..5e96f6f188 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */

+#include "roc_api.h"
+
 #include "cnxk_tim_evdev.h"
 #include "cnxk_tim_worker.h"

diff --git a/drivers/event/cnxk/cnxk_worker.c b/drivers/event/cnxk/cnxk_worker.c
index 60876abcff..a07c9185d9 100644
--- a/drivers/event/cnxk/cnxk_worker.c
+++ b/drivers/event/cnxk/cnxk_worker.c
@@ -6,9 +6,7 @@
 #include <rte_pmd_cnxk_eventdev.h>
 #include <rte_eventdev.h>

-#include "roc_platform.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"

 struct pwords {
 	uint64_t u[5];
--
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 03/22] event/cnxk: add CN20K specific device probe
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 02/22] common/cnxk: implement SSO HW info pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 04/22] event/cnxk: add CN20K device config pbhagavatula
                       ` (19 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra, Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add platform specific event device probe and remove, also add
event device info get function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 doc/guides/eventdevs/cnxk.rst          | 23 ++++---
 doc/guides/rel_notes/release_24_11.rst |  3 +
 drivers/common/cnxk/roc_sso.c          | 10 ++-
 drivers/event/cnxk/cn20k_eventdev.c    | 93 ++++++++++++++++++++++++++
 drivers/event/cnxk/meson.build         |  8 ++-
 5 files changed, 123 insertions(+), 14 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.c

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index e21846f4e0..55028f889b 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -16,6 +16,7 @@ Supported OCTEON cnxk SoCs
 
 - CN9XX
 - CN10XX
+- CN20XX
 
 Features
 --------
@@ -36,7 +37,7 @@ Features of the OCTEON cnxk SSO PMD are:
   DRAM
 - HW accelerated dequeue timeout support to enable power management
 - HW managed event timers support through TIM, with high precision and
-  time granularity of 2.5us on CN9K and 1us on CN10K.
+  time granularity of 2.5us on CN9K and 1us on CN10K/CN20K.
 - Up to 256 TIM rings a.k.a event timer adapters.
 - Up to 8 rings traversed in parallel.
 - HW managed packets enqueued from ethdev to eventdev exposed through event eth
@@ -45,8 +46,8 @@ Features of the OCTEON cnxk SSO PMD are:
 - Lockfree Tx from event eth Tx adapter using ``RTE_ETH_TX_OFFLOAD_MT_LOCKFREE``
   capability while maintaining receive packet order.
 - Full Rx/Tx offload support defined through ethdev queue configuration.
-- HW managed event vectorization on CN10K for packets enqueued from ethdev to
-  eventdev configurable per each Rx queue in Rx adapter.
+- HW managed event vectorization on CN10K/CN20K for packets enqueued from ethdev
+  to eventdev configurable per each Rx queue in Rx adapter.
 - Event vector transmission via Tx adapter.
 - Up to 2 event link profiles.
 
@@ -93,13 +94,13 @@ Runtime Config Options
 
     -a 0002:0e:00.0,qos=[1-50-50]
 
-- ``CN10K WQE stashing support``
+- ``CN10K/CN20K WQE stashing support``
 
-  CN10K supports stashing the scheduled WQE carried by `rte_event` to the
-  cores L2 Dcache. The number of cache lines to be stashed and the offset
-  is configurable per HWGRP i.e. event queue. The dictionary format is as
-  follows `[Qx|stash_offset|stash_length]` here the stash offset can be
-  a negative integer.
+  CN10K/CN20K supports stashing the scheduled WQE carried by `rte_event`
+  to the cores L2 Dcache. The number of cache lines to be stashed and the
+  offset is configurable per HWGRP i.e. event queue. The dictionary format
+  is as follows `[Qx|stash_offset|stash_length]` here the stash offset can
+  be a negative integer.
   By default, stashing is enabled on queues which have been connected to
   Rx adapter. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed.
 
@@ -188,8 +189,8 @@ Runtime Config Options
 
     -a 0002:0e:00.0,tim_eclk_freq=122880000-1000000000-0
 
-Power Saving on CN10K
----------------------
+Power Saving on CN10K/CN20K
+---------------------------
 
 ARM cores can additionally use WFE when polling for transactions on SSO bus
 to save power i.e., in the event dequeue call ARM core can enter WFE and exit
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index fa4822d928..c906262012 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -247,6 +247,9 @@ New Features
   Added ability for node to advertise and update multiple xstat counters,
   that can be retrieved using ``rte_graph_cluster_stats_get``.
 
+* **Updated Marvell cnxk event device driver.**
+
+  * Added eventdev driver support for CN20K SoC.
 
 Removed Items
 -------------
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 8a219b985b..45cf6fc39e 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -870,7 +870,10 @@ sso_update_msix_vec_count(struct roc_sso *roc_sso, uint16_t sso_vec_cnt)
 	if (idev == NULL)
 		return -ENODEV;
 
-	mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
+	if (roc_model_is_cn20k())
+		mbox_vec_cnt = RVU_MBOX_PF_INT_VEC_AFPF_MBOX + 1;
+	else
+		mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
 
 	/* Allocating vectors for the first time */
 	if (plt_intr_max_intr_get(pci_dev->intr_handle) == 0) {
@@ -1017,7 +1020,10 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 	}
 
 	/* 2 error interrupt per TIM LF */
-	sso_vec_cnt += 2 * nb_tim_lfs;
+	if (roc_model_is_cn20k())
+		sso_vec_cnt += 3 * nb_tim_lfs;
+	else
+		sso_vec_cnt += 2 * nb_tim_lfs;
 
 	rc = sso_update_msix_vec_count(roc_sso, sso_vec_cnt);
 	if (rc < 0) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
new file mode 100644
index 0000000000..c4b80f64f3
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include "roc_api.h"
+
+#include "cnxk_eventdev.h"
+
+static void
+cn20k_sso_set_rsrc(void *arg)
+{
+	struct cnxk_sso_evdev *dev = arg;
+
+	dev->max_event_ports = dev->sso.max_hws;
+	dev->max_event_queues = dev->sso.max_hwgrp > RTE_EVENT_MAX_QUEUES_PER_DEV ?
+					RTE_EVENT_MAX_QUEUES_PER_DEV :
+					dev->sso.max_hwgrp;
+}
+
+static void
+cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	dev_info->driver_name = RTE_STR(EVENTDEV_NAME_CN20K_PMD);
+	cnxk_sso_info_get(dev, dev_info);
+	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
+}
+
+static struct eventdev_ops cn20k_sso_dev_ops = {
+	.dev_infos_get = cn20k_sso_info_get,
+};
+
+static int
+cn20k_sso_init(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = roc_plt_init();
+	if (rc < 0) {
+		plt_err("Failed to initialize platform model");
+		return rc;
+	}
+
+	event_dev->dev_ops = &cn20k_sso_dev_ops;
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	rc = cnxk_sso_init(event_dev);
+	if (rc < 0)
+		return rc;
+
+	cn20k_sso_set_rsrc(cnxk_sso_pmd_priv(event_dev));
+	if (!dev->max_event_ports || !dev->max_event_queues) {
+		plt_err("Not enough eventdev resource queues=%d ports=%d", dev->max_event_queues,
+			dev->max_event_ports);
+		cnxk_sso_fini(event_dev);
+		return -ENODEV;
+	}
+
+	plt_sso_dbg("Initializing %s max_queues=%d max_ports=%d", event_dev->data->name,
+		    dev->max_event_queues, dev->max_event_ports);
+
+	return 0;
+}
+
+static int
+cn20k_sso_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	return rte_event_pmd_pci_probe(pci_drv, pci_dev, sizeof(struct cnxk_sso_evdev),
+				       cn20k_sso_init);
+}
+
+static const struct rte_pci_id cn20k_pci_sso_map[] = {
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_PF),
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_VF),
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver cn20k_pci_sso = {
+	.id_table = cn20k_pci_sso_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
+	.probe = cn20k_sso_probe,
+	.remove = cnxk_sso_remove,
+};
+
+RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
+RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
+RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 2c1060ad87..7de78eeba0 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -14,7 +14,7 @@ else
         soc_type = platform
 endif
 
-if soc_type != 'cn9k' and soc_type != 'cn10k'
+if soc_type != 'cn9k' and soc_type != 'cn10k' and soc_type != 'cn20k'
         soc_type = 'all'
 endif
 
@@ -325,6 +325,12 @@ sources += files(
 endif
 endif
 
+if soc_type == 'cn20k' or soc_type == 'all'
+sources += files(
+        'cn20k_eventdev.c',
+)
+endif
+
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
 if cc.get_id() == 'clang'
         extra_flags += ['-Wno-asm-operand-widths']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 04/22] event/cnxk: add CN20K device config
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 02/22] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
                       ` (18 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event device configuration that attaches the requested
number of SSO HWS(event ports) and HWGRP(event queues) LFs to
the RVU PF/VF.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index c4b80f64f3..753a976cd3 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -17,6 +17,17 @@ cn20k_sso_set_rsrc(void *arg)
 					dev->sso.max_hwgrp;
 }
 
+static int
+cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
+{
+	struct cnxk_tim_evdev *tim_dev = cnxk_tim_priv_get();
+	struct cnxk_sso_evdev *dev = arg;
+	uint16_t nb_tim_lfs;
+
+	nb_tim_lfs = tim_dev ? tim_dev->nb_rings : 0;
+	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -27,8 +38,33 @@ cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *de
 	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
 }
 
+static int
+cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = cnxk_sso_dev_validate(event_dev, 1, UINT32_MAX);
+	if (rc < 0) {
+		plt_err("Invalid event device configuration");
+		return -EINVAL;
+	}
+
+	rc = cn20k_sso_rsrc_init(dev, dev->nb_event_ports, dev->nb_event_queues);
+	if (rc < 0) {
+		plt_err("Failed to initialize SSO resources");
+		return -ENODEV;
+	}
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
+	.dev_configure = cn20k_sso_dev_configure,
+
+	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 05/22] event/cnxk: add CN20k event queue configuration
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (2 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 04/22] event/cnxk: add CN20K device config pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
                       ` (17 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add setup and release functions for event queues i.e. SSO HWGRPs.
Allocate buffers in DRAM that hold inflight events.
Register device args to modify inflight event buffer count,
HWGRP QoS and stash.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c |  2 +-
 drivers/event/cnxk/cn20k_eventdev.c | 14 ++++++++++++++
 drivers/event/cnxk/cnxk_eventdev.c  |  4 ++--
 drivers/event/cnxk/cnxk_eventdev.h  |  2 +-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index b6c4896c69..25ba7e4133 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -1319,7 +1319,7 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
-			      CN10K_SSO_STASH "=<string>"
+			      CNXK_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
 			      CNXK_TIM_RINGS_LMT "=<int>"
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 753a976cd3..b876c36806 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -56,6 +56,12 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 		return -ENODEV;
 	}
 
+	rc = cnxk_sso_xaq_allocate(dev);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+cnxk_rsrc_fini:
+	roc_sso_rsrc_fini(&dev->sso);
 	return rc;
 }
 
@@ -64,6 +70,10 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_configure = cn20k_sso_dev_configure,
 
 	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.queue_setup = cnxk_sso_queue_setup,
+	.queue_release = cnxk_sso_queue_release,
+	.queue_attr_set = cnxk_sso_queue_attribute_set,
+
 	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
@@ -127,3 +137,7 @@ static struct rte_pci_driver cn20k_pci_sso = {
 RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
 RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
 RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
+			      CNXK_SSO_XAE_CNT "=<int>"
+			      CNXK_SSO_GGRP_QOS "=<string>"
+			      CNXK_SSO_STASH "=<string>");
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index ab7420ab79..be6a487b59 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -624,8 +624,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
 			   &dev->force_ena_bp);
 	rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag,
 			   &single_ws);
-	rte_kvargs_process(kvlist, CN10K_SSO_STASH,
-			   &parse_sso_kvargs_stash_dict, dev);
+	rte_kvargs_process(kvlist, CNXK_SSO_STASH, &parse_sso_kvargs_stash_dict,
+			   dev);
 	dev->dual_ws = !single_ws;
 	rte_kvargs_free(kvlist);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 904a9b022d..ba08fa2173 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -27,7 +27,7 @@
 #define CNXK_SSO_GGRP_QOS  "qos"
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
-#define CN10K_SSO_STASH	   "stash"
+#define CNXK_SSO_STASH	   "stash"
 
 #define CNXK_SSO_MAX_PROFILES 2
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 06/22] event/cnxk: add CN20K event port configuration
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (3 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
                       ` (16 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HWS a.k.a event port setup, release, link, unlink
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c         |  61 ++-----
 drivers/event/cnxk/cn20k_eventdev.c         | 173 ++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h         |  26 +++
 drivers/event/cnxk/cnxk_common.h            |  55 +++++++
 drivers/event/cnxk/cnxk_eventdev.h          |   6 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c |   6 +-
 6 files changed, 274 insertions(+), 53 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.h
 create mode 100644 drivers/event/cnxk/cnxk_common.h

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 25ba7e4133..82d973a420 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -2,15 +2,16 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include <rte_dmadev_pmd.h>
+
+#include "cn10k_cryptodev_ops.h"
+#include "cn10k_ethdev.h"
 #include "cn10k_tx_worker.h"
 #include "cn10k_worker.h"
-#include "cn10k_ethdev.h"
-#include "cn10k_cryptodev_ops.h"
+#include "cnxk_common.h"
+#include "cnxk_dma_event_dp.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
-#include "cnxk_dma_event_dp.h"
-
-#include <rte_dmadev_pmd.h>
 
 #define CN10K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                           \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
@@ -18,29 +19,6 @@
 #define CN10K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                           \
 	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
 
-static uint32_t
-cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
-{
-	uint32_t wdata = 1;
-
-	if (dev->deq_tmo_ns)
-		wdata |= BIT(16);
-
-	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_NONE:
-	default:
-		break;
-	case CN10K_GW_MODE_PREF:
-		wdata |= BIT(19);
-		break;
-	case CN10K_GW_MODE_PREF_WFE:
-		wdata |= BIT(20) | BIT(19);
-		break;
-	}
-
-	return wdata;
-}
-
 static void *
 cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -61,7 +39,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
 	ws->hws_id = port_id;
 	ws->swtag_req = 0;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
 	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
@@ -219,12 +197,12 @@ cn10k_sso_hws_reset(void *arg, void *hws)
 	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
 
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
@@ -571,18 +549,7 @@ cn10k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
-	switch (event_dev->data->dev_conf.preschedule_type) {
-	default:
-	case RTE_EVENT_PRESCHEDULE_NONE:
-		dev->gw_mode = CN10K_GW_MODE_NONE;
-		break;
-	case RTE_EVENT_PRESCHEDULE:
-		dev->gw_mode = CN10K_GW_MODE_PREF;
-		break;
-	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
-		break;
-	}
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
 
 	rc = cnxk_setup_event_ports(event_dev, cn10k_sso_init_hws_mem,
 				    cn10k_sso_hws_setup);
@@ -665,13 +632,13 @@ cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	/* Check if we have work in PRF_WQE0, if so extract it. */
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
 		       BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index b876c36806..d275cebe52 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,7 +4,86 @@
 
 #include "roc_api.h"
 
+#include "cn20k_eventdev.h"
+#include "cnxk_common.h"
 #include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static void *
+cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws;
+
+	/* Allocate event port memory */
+	ws = rte_zmalloc("cn20k_ws", sizeof(struct cn20k_sso_hws) + RTE_CACHE_LINE_SIZE,
+			 RTE_CACHE_LINE_SIZE);
+	if (ws == NULL) {
+		plt_err("Failed to alloc memory for port=%d", port_id);
+		return NULL;
+	}
+
+	/* First cache line is reserved for cookie */
+	ws = (struct cn20k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
+	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+	ws->hws_id = port_id;
+	ws->swtag_req = 0;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->gw_rdata = SSO_TT_EMPTY << 32;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
+
+	return ws;
+}
+
+static int
+cn20k_sso_hws_link(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_link(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static int
+cn20k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_unlink(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static void
+cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t val;
+
+	ws->grp_base = grp_base;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+	ws->xaq_lmt = dev->xaq_lmt;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
+	ws->aw_lmt = dev->sso.lmt_base;
+
+	/* Set get_work timeout for HWS */
+	val = NSEC2USEC(dev->deq_tmo_ns);
+	val = val ? val - 1 : 0;
+	plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
+}
+
+static void
+cn20k_sso_hws_release(void *arg, void *hws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint16_t i, j;
+
+	for (i = 0; i < CNXK_SSO_MAX_PROFILES; i++)
+		for (j = 0; j < dev->nb_event_queues; j++)
+			roc_sso_hws_unlink(&dev->sso, ws->hws_id, &j, 1, i, 0);
+	memset(ws, 0, sizeof(*ws));
+}
 
 static void
 cn20k_sso_set_rsrc(void *arg)
@@ -60,11 +139,98 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
+
+	rc = cnxk_setup_event_ports(event_dev, cn20k_sso_init_hws_mem, cn20k_sso_hws_setup);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+	/* Restore any prior port-queue mapping. */
+	cnxk_sso_restore_links(event_dev, cn20k_sso_hws_link);
+
+	dev->configured = 1;
+	rte_mb();
+
+	return 0;
 cnxk_rsrc_fini:
 	roc_sso_rsrc_fini(&dev->sso);
+	dev->nb_event_ports = 0;
 	return rc;
 }
 
+static int
+cn20k_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
+		     const struct rte_event_port_conf *port_conf)
+{
+
+	RTE_SET_USED(port_conf);
+	return cnxk_sso_port_setup(event_dev, port_id, cn20k_sso_hws_setup);
+}
+
+static void
+cn20k_sso_port_release(void *port)
+{
+	struct cnxk_sso_hws_cookie *gws_cookie = cnxk_sso_hws_get_cookie(port);
+	struct cnxk_sso_evdev *dev;
+
+	if (port == NULL)
+		return;
+
+	dev = cnxk_sso_pmd_priv(gws_cookie->event_dev);
+	if (!gws_cookie->configured)
+		goto free;
+
+	cn20k_sso_hws_release(dev, port);
+	memset(gws_cookie, 0, sizeof(*gws_cookie));
+free:
+	rte_free(gws_cookie);
+}
+
+static int
+cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_links];
+	uint16_t link;
+
+	RTE_SET_USED(priorities);
+	for (link = 0; link < nb_links; link++)
+		hwgrp_ids[link] = queues[link];
+	nb_links = cn20k_sso_hws_link(dev, port, hwgrp_ids, nb_links, profile);
+
+	return (int)nb_links;
+}
+
+static int
+cn20k_sso_port_unlink_profile(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+			      uint16_t nb_unlinks, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_unlinks];
+	uint16_t unlink;
+
+	for (unlink = 0; unlink < nb_unlinks; unlink++)
+		hwgrp_ids[unlink] = queues[unlink];
+	nb_unlinks = cn20k_sso_hws_unlink(dev, port, hwgrp_ids, nb_unlinks, profile);
+
+	return (int)nb_unlinks;
+}
+
+static int
+cn20k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+		    const uint8_t priorities[], uint16_t nb_links)
+{
+	return cn20k_sso_port_link_profile(event_dev, port, queues, priorities, nb_links, 0);
+}
+
+static int
+cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+		      uint16_t nb_unlinks)
+{
+	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -75,6 +241,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.queue_attr_set = cnxk_sso_queue_attribute_set,
 
 	.port_def_conf = cnxk_sso_port_def_conf,
+	.port_setup = cn20k_sso_port_setup,
+	.port_release = cn20k_sso_port_release,
+	.port_link = cn20k_sso_port_link,
+	.port_unlink = cn20k_sso_port_unlink,
+	.port_link_profile = cn20k_sso_port_link_profile,
+	.port_unlink_profile = cn20k_sso_port_unlink_profile,
+	.timeout_ticks = cnxk_sso_timeout_ticks,
 };
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
new file mode 100644
index 0000000000..5b6c558d5a
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef __CN20K_EVENTDEV_H__
+#define __CN20K_EVENTDEV_H__
+
+#define CN20K_SSO_DEFAULT_STASH_OFFSET -1
+#define CN20K_SSO_DEFAULT_STASH_LENGTH 2
+
+struct __rte_cache_aligned cn20k_sso_hws {
+	uint64_t base;
+	uint32_t gw_wdata;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+	/* Add Work Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
+	uintptr_t aw_lmt;
+	uintptr_t grp_base;
+	uint16_t xae_waes;
+	int32_t xaq_lmt;
+};
+
+#endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
new file mode 100644
index 0000000000..712d82bee7
--- /dev/null
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CNXK_COMMON_H__
+#define __CNXK_COMMON_H__
+
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static uint32_t
+cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
+{
+	uint32_t wdata = 1;
+
+	if (dev->deq_tmo_ns)
+		wdata |= BIT(16);
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	case CNXK_GW_MODE_PREF:
+		wdata |= BIT(19);
+		break;
+	case CNXK_GW_MODE_PREF_WFE:
+		wdata |= BIT(20) | BIT(19);
+		break;
+	}
+
+	return wdata;
+}
+
+static uint8_t
+cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
+{
+	uint8_t gw_mode = 0;
+
+	switch (preschedule_type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		gw_mode = CNXK_GW_MODE_NONE;
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		gw_mode = CNXK_GW_MODE_PREF;
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		gw_mode = CNXK_GW_MODE_PREF_WFE;
+		break;
+	}
+
+	return gw_mode;
+}
+
+#endif /* __CNXK_COMMON_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index ba08fa2173..4066497e6b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -38,9 +38,9 @@
 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
 #define CN9K_DUAL_WS_NB_WS	     2
 
-#define CN10K_GW_MODE_NONE     0
-#define CN10K_GW_MODE_PREF     1
-#define CN10K_GW_MODE_PREF_WFE 2
+#define CNXK_GW_MODE_NONE     0
+#define CNXK_GW_MODE_PREF     1
+#define CNXK_GW_MODE_PREF_WFE 2
 
 #define CNXK_QOS_NORMALIZE(val, min, max, cnt)                                 \
 	(min + val / ((max + cnt - 1) / cnt))
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 311de3d92b..7a3262bcff 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1568,15 +1568,15 @@ cnxk_sso_selftest(const char *dev_name)
 
 	if (roc_model_runtime_is_cn10k()) {
 		printf("Verifying CN10K workslot getwork mode none\n");
-		dev->gw_mode = CN10K_GW_MODE_NONE;
+		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF;
+		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
+		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 07/22] event/cnxk: add CN20K SSO enqueue fast path
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (4 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
                       ` (15 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS fastpath event device enqueue functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  20 +-
 drivers/event/cnxk/cn20k_worker.c   | 384 ++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  21 ++
 drivers/event/cnxk/meson.build      |   1 +
 4 files changed, 425 insertions(+), 1 deletion(-)
 create mode 100644 drivers/event/cnxk/cn20k_worker.c
 create mode 100644 drivers/event/cnxk/cn20k_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index d275cebe52..1f5fd31753 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -5,6 +5,7 @@
 #include "roc_api.h"
 
 #include "cn20k_eventdev.h"
+#include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -107,6 +108,21 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+
+static void
+cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(RTE_ARCH_ARM64)
+
+	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
+	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
+	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -264,8 +280,10 @@ cn20k_sso_init(struct rte_eventdev *event_dev)
 
 	event_dev->dev_ops = &cn20k_sso_dev_ops;
 	/* For secondary processes, the primary has done all the work */
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		cn20k_sso_fp_fns_set(event_dev);
 		return 0;
+	}
 
 	rc = cnxk_sso_init(event_dev);
 	if (rc < 0)
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
new file mode 100644
index 0000000000..c7de493681
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include <rte_vect.h>
+
+#include "roc_api.h"
+
+#include "cn20k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+/* SSO Operations */
+
+static __rte_always_inline uint8_t
+cn20k_sso_hws_new_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint64_t event_ptr = ev->u64;
+	const uint16_t grp = ev->queue_id;
+
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
+	if (ws->xaq_lmt <= *ws->fc_mem)
+		return 0;
+
+	cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grp_base + (grp << 12));
+	return 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_swtag(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
+
+	/* CNXK model
+	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
+	 *
+	 * SSO_TT_ORDERED        norm           norm             untag
+	 * SSO_TT_ATOMIC         norm           norm		   untag
+	 * SSO_TT_UNTAGGED       norm           norm             NOOP
+	 */
+
+	if (new_tt == SSO_TT_UNTAGGED) {
+		if (cur_tt != SSO_TT_UNTAGGED)
+			cnxk_sso_hws_swtag_untag(ws->base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+	} else {
+		cnxk_sso_hws_swtag_norm(tag, new_tt, ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
+	}
+	ws->swtag_req = 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_group(struct cn20k_sso_hws *ws, const struct rte_event *ev, const uint16_t grp)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+
+	plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+	cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_forward_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint8_t grp = ev->queue_id;
+
+	/* Group hasn't changed, Use SWTAG to forward the event */
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
+		cn20k_sso_hws_fwd_swtag(ws, ev);
+	else
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
+		cn20k_sso_hws_fwd_group(ws, ev, grp);
+}
+
+static inline int32_t
+sso_read_xaq_space(struct cn20k_sso_hws *ws)
+{
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
+	       ws->xae_waes;
+}
+
+static inline void
+sso_lmt_aw_wait_fc(struct cn20k_sso_hws *ws, int64_t req)
+{
+	int64_t cached, refill;
+
+retry:
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
+		;
+
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
+	/* Check if there is enough space, else update and retry. */
+	if (cached < 0) {
+		/* Check if we have space else retry. */
+		do {
+			refill = sso_read_xaq_space(ws);
+		} while (refill <= 0);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
+
+		goto retry;
+	}
+}
+
+#define VECTOR_SIZE_BITS	     0xFFFFFFFFFFF80000ULL
+#define VECTOR_GET_LINE_OFFSET(line) (19 + (3 * line))
+
+static uint64_t
+vector_size_partial_mask(uint16_t off, uint16_t cnt)
+{
+	return (VECTOR_SIZE_BITS & ~(~0x0ULL << off)) | ((uint64_t)(cnt - 1) << off);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_new_event_lmtst(struct cn20k_sso_hws *ws, uint8_t queue_id,
+			      const struct rte_event ev[], uint16_t n)
+{
+	uint16_t lines, partial_line, burst, left;
+	uint64_t wdata[2], pa[2] = {0};
+	uintptr_t lmt_addr;
+	uint16_t sz0, sz1;
+	uint16_t lmt_id;
+
+	sz0 = sz1 = 0;
+	lmt_addr = ws->aw_lmt;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	left = n;
+again:
+	burst = RTE_MIN(BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2 + ROC_LMT_LINES_PER_CORE_LOG2), left);
+
+	/* Set wdata */
+	lines = burst >> ROC_SSO_AW_PER_LMT_LINE_LOG2;
+	partial_line = burst & (BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2) - 1);
+	wdata[0] = wdata[1] = 0;
+	if (lines > BIT(ROC_LMT_LINES_PER_STR_LOG2)) {
+		wdata[0] = lmt_id;
+		wdata[0] |= 15ULL << 12;
+		wdata[0] |= VECTOR_SIZE_BITS;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = 16 << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+
+		wdata[1] = lmt_id + 16;
+		pa[1] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+
+		lines -= 17;
+		wdata[1] |= partial_line ? (uint64_t)(lines + 1) << 12 : (uint64_t)(lines << 12);
+		wdata[1] |= partial_line ? vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								    partial_line) :
+					   VECTOR_SIZE_BITS;
+		sz1 = burst - sz0;
+		partial_line = 0;
+	} else if (lines) {
+		/* We need to handle two cases here:
+		 * 1. Partial line spill over to wdata[1] i.e. lines == 16
+		 * 2. Partial line with spill lines < 16.
+		 */
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = lines << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+		if (lines == 16) {
+			wdata[0] |= 15ULL << 12;
+			wdata[0] |= VECTOR_SIZE_BITS;
+			if (partial_line) {
+				wdata[1] = lmt_id + 16;
+				pa[1] = (ws->grp_base + (queue_id << 12) +
+					 SSO_LF_GGRP_OP_AW_LMTST) |
+					((partial_line - 1) << 4);
+			}
+		} else {
+			lines -= 1;
+			wdata[0] |= partial_line ? (uint64_t)(lines + 1) << 12 :
+						   (uint64_t)(lines << 12);
+			wdata[0] |= partial_line ?
+					    vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								     partial_line) :
+					    VECTOR_SIZE_BITS;
+			sz0 += partial_line;
+		}
+		sz1 = burst - sz0;
+		partial_line = 0;
+	}
+
+	/* Only partial lines */
+	if (partial_line) {
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) |
+			((partial_line - 1) << 4);
+		sz0 = partial_line;
+		sz1 = burst - sz0;
+	}
+
+#if defined(RTE_ARCH_ARM64)
+	uint64x2_t aw_mask = {0xC0FFFFFFFFULL, ~0x0ULL};
+	uint64x2_t tt_mask = {0x300000000ULL, 0};
+	uint16_t parts;
+
+	while (burst) {
+		parts = burst > 7 ? 8 : plt_align32prevpow2(burst);
+		burst -= parts;
+		/* Lets try to fill at least one line per burst. */
+		switch (parts) {
+		case 8: {
+			uint64x2_t aw0, aw1, aw2, aw3, aw4, aw5, aw6, aw7;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+			aw4 = vandq_u64(vld1q_u64((const uint64_t *)&ev[4]), aw_mask);
+			aw5 = vandq_u64(vld1q_u64((const uint64_t *)&ev[5]), aw_mask);
+			aw6 = vandq_u64(vld1q_u64((const uint64_t *)&ev[6]), aw_mask);
+			aw7 = vandq_u64(vld1q_u64((const uint64_t *)&ev[7]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+			aw4 = vorrq_u64(vandq_u64(vshrq_n_u64(aw4, 6), tt_mask), aw4);
+			aw5 = vorrq_u64(vandq_u64(vshrq_n_u64(aw5, 6), tt_mask), aw5);
+			aw6 = vorrq_u64(vandq_u64(vshrq_n_u64(aw6, 6), tt_mask), aw6);
+			aw7 = vorrq_u64(vandq_u64(vshrq_n_u64(aw7, 6), tt_mask), aw7);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 64), aw4);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 80), aw5);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 96), aw6);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 112), aw7);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 128);
+		} break;
+		case 4: {
+			uint64x2_t aw0, aw1, aw2, aw3;
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 64);
+		} break;
+		case 2: {
+			uint64x2_t aw0, aw1;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 32);
+		} break;
+		case 1: {
+			__uint128_t aw0;
+
+			aw0 = ev[0].u64;
+			aw0 <<= 64;
+			aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+			aw0 |= (uint64_t)ev[0].sched_type << 32;
+
+			*((__uint128_t *)lmt_addr) = aw0;
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+		} break;
+		}
+		ev += parts;
+	}
+#else
+	uint16_t i;
+
+	for (i = 0; i < burst; i++) {
+		__uint128_t aw0;
+
+		aw0 = ev[0].u64;
+		aw0 <<= 64;
+		aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+		aw0 |= (uint64_t)ev[0].sched_type << 32;
+		*((__uint128_t *)lmt_addr) = aw0;
+		lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+	}
+#endif
+
+	/* wdata[0] will be always valid */
+	sso_lmt_aw_wait_fc(ws, sz0);
+	roc_lmt_submit_steorl(wdata[0], pa[0]);
+	if (wdata[1]) {
+		sso_lmt_aw_wait_fc(ws, sz1);
+		roc_lmt_submit_steorl(wdata[1], pa[1]);
+	}
+
+	left -= (sz0 + sz1);
+	if (left)
+		goto again;
+
+	return n;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	switch (ev->op) {
+	case RTE_EVENT_OP_NEW:
+		return cn20k_sso_hws_new_event(ws, ev);
+	case RTE_EVENT_OP_FORWARD:
+		cn20k_sso_hws_forward_event(ws, ev);
+		break;
+	case RTE_EVENT_OP_RELEASE:
+		if (ws->swtag_req) {
+			cnxk_sso_hws_desched(ev->u64, ws->base);
+			ws->swtag_req = 0;
+			break;
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	uint16_t idx = 0, done = 0, rc = 0;
+	struct cn20k_sso_hws *ws = port;
+	uint8_t queue_id;
+	int32_t space;
+
+	/* Do a common back-pressure check and return */
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
+	if (space <= 0)
+		return 0;
+	nb_events = space < nb_events ? space : nb_events;
+
+	do {
+		queue_id = ev[idx].queue_id;
+		for (idx = idx + 1; idx < nb_events; idx++)
+			if (queue_id != ev[idx].queue_id)
+				break;
+
+		rc = cn20k_sso_hws_new_event_lmtst(ws, queue_id, &ev[done], idx - done);
+		if (rc != (idx - done))
+			return rc + done;
+		done += rc;
+
+	} while (done < nb_events);
+
+	return done;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	cn20k_sso_hws_forward_event(ws, ev);
+
+	return 1;
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
new file mode 100644
index 0000000000..5ff8f11b38
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_WORKER_H__
+#define __CN20K_WORKER_H__
+
+#include <rte_eventdev.h>
+
+#include "cnxk_worker.h"
+#include "cn20k_eventdev.h"
+
+/* CN20K Fastpath functions. */
+uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
+					   uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 7de78eeba0..ee27d38115 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -328,6 +328,7 @@ endif
 if soc_type == 'cn20k' or soc_type == 'all'
 sources += files(
         'cn20k_eventdev.c',
+        'cn20k_worker.c',
 )
 endif
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 08/22] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (5 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
                       ` (14 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS event dequeue fastpath functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |   5 +
 drivers/event/cnxk/cn20k_worker.c   |  54 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   | 137 +++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 1f5fd31753..6ef30fa45a 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -113,11 +113,16 @@ static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
+	if (dev->deq_tmo_ns)
+		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
+
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index c7de493681..2dcde0b444 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -382,3 +382,57 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 
 	return 1;
 }
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(timeout_ticks);
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return 1;
+	}
+
+	return cn20k_sso_hws_get_work(ws, ev, 0);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, 0);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, 0);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5ff8f11b38..8dc60a06ec 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -7,8 +7,136 @@
 
 #include <rte_eventdev.h>
 
-#include "cnxk_worker.h"
 #include "cn20k_eventdev.h"
+#include "cnxk_worker.h"
+
+static __rte_always_inline void
+cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
+{
+	RTE_SET_USED(ws);
+	RTE_SET_USED(flags);
+
+	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
+		 (u64[0] & 0xffffffff);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+	gw.get_work = ws->gw_wdata;
+#if defined(RTE_ARCH_ARM64)
+#if defined(__clang__)
+	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbz %[x0], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(".arch armv8-a+lse\n"
+		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+	gw.u64[0] = x0;
+	gw.u64[1] = x1;
+#else
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbz %[wdata], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [wdata] "=&r"(gw.get_work)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+		     : [wdata] "+r"(gw.get_work)
+		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+#endif
+#else
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
+#endif
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
+
+/* Used in cleaning up workslot. */
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work_empty(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+#ifdef RTE_ARCH_ARM64
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbz %[tag], 63, .Ldone%=		\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbnz %[tag], 63, .Lrty%=		\n"
+		     ".Ldone%=:	dmb ld					\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
+		     : "memory");
+#else
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+#endif
+
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+	else
+		gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+			    (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
 
 /* CN20K Fastpath functions. */
 uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
@@ -18,4 +146,11 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 
+uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+					   uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
+					       uint16_t nb_events, uint64_t timeout_ticks);
+
 #endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 09/22] event/cnxk: add CN20K event port quiesce
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (6 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
                       ` (13 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port quiesce function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 60 +++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 6ef30fa45a..4e9f6cedbc 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -207,6 +207,65 @@ cn20k_sso_port_release(void *port)
 	rte_free(gws_cookie);
 }
 
+static void
+cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+		       rte_eventdev_port_flush_t flush_cb, void *args)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct cn20k_sso_hws *ws = port;
+	struct rte_event ev;
+	uint64_t ptag;
+	bool is_pend;
+
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+		is_pend = true;
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	if (is_pend && ev.u64)
+		if (flush_cb)
+			flush_cb(event_dev->data->dev_id, ev, args);
+	ptag = (plt_read64(ws->base + SSOW_LF_GWS_TAG) >> 32) & SSO_TT_EMPTY;
+	if (ptag != SSO_TT_EMPTY)
+		cnxk_sso_hws_swtag_flush(ws->base);
+
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & BIT_ULL(56));
+
+	/* Check if we have work in PRF_WQE0, if so extract it. */
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (ev.u64) {
+			if (flush_cb)
+				flush_cb(event_dev->data->dev_id, ev, args);
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		do {
+			ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (ptag & BIT_ULL(56));
+	}
+	ws->swtag_req = 0;
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
 			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
@@ -264,6 +323,7 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_def_conf = cnxk_sso_port_def_conf,
 	.port_setup = cn20k_sso_port_setup,
 	.port_release = cn20k_sso_port_release,
+	.port_quiesce = cn20k_sso_port_quiesce,
 	.port_link = cn20k_sso_port_link,
 	.port_unlink = cn20k_sso_port_unlink,
 	.port_link_profile = cn20k_sso_port_link_profile,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 10/22] event/cnxk: add CN20K event port profile switch
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (7 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
                       ` (12 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port profile switch.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  1 +
 drivers/event/cnxk/cn20k_worker.c   | 11 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  1 +
 3 files changed, 13 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 4e9f6cedbc..9b7ccf690c 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -123,6 +123,7 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 	if (dev->deq_tmo_ns)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
+	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2dcde0b444..2c723523d2 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -383,6 +383,17 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 	return 1;
 }
 
+int __rte_hot
+cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(0xFFUL);
+	ws->gw_wdata |= (profile + 1);
+
+	return 0;
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 8dc60a06ec..447f28f0f2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -145,6 +145,7 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 					       uint16_t nb_events);
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
+int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 11/22] event/cnxk: add CN20K event port preschedule
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (8 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 12/22] event/cnxk: add CN20K device start pbhagavatula
                       ` (11 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port preschedule modify and preschedule
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/ssow.h       |  1 +
 drivers/event/cnxk/cn20k_eventdev.c |  2 ++
 drivers/event/cnxk/cn20k_worker.c   | 30 +++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  3 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
index c146a8c3ef..ec6bd7896b 100644
--- a/drivers/common/cnxk/hw/ssow.h
+++ b/drivers/common/cnxk/hw/ssow.h
@@ -37,6 +37,7 @@
 #define SSOW_LF_GWS_PRF_WQE1	     (0x448ull) /* [CN10K, .) */
 #define SSOW_LF_GWS_OP_GET_WORK0     (0x600ull)
 #define SSOW_LF_GWS_OP_GET_WORK1     (0x608ull) /* [CN10K, .) */
+#define SSOW_LF_GWS_OP_PRF_GETWORK   (0x610ull) /* [CN20K, .) */
 #define SSOW_LF_GWS_OP_SWTAG_FLUSH   (0x800ull)
 #define SSOW_LF_GWS_OP_SWTAG_UNTAG   (0x810ull)
 #define SSOW_LF_GWS_OP_SWTP_CLR	     (0x820ull)
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 9b7ccf690c..34636c77ce 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -124,6 +124,8 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
+	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
+	event_dev->preschedule = cn20k_sso_hws_preschedule;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2c723523d2..ebfe863bc5 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -394,6 +394,36 @@ cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
 	return 0;
 }
 
+int __rte_hot
+cn20k_sso_hws_preschedule_modify(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(BIT(19) | BIT(20));
+	switch (type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		ws->gw_wdata |= BIT(19);
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		ws->gw_wdata |= BIT(19) | BIT(20);
+		break;
+	}
+
+	return 0;
+}
+
+void __rte_hot
+cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(type);
+	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 447f28f0f2..dd8b72bc53 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -146,6 +146,9 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
+int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
+					       enum rte_event_dev_preschedule_type type);
+void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 12/22] event/cnxk: add CN20K device start
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (9 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
                       ` (10 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K start function along with few cleanup API's to maintain
sanity.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c | 103 +--------------------------
 drivers/event/cnxk/cn20k_eventdev.c |  76 ++++++++++++++++++++
 drivers/event/cnxk/cnxk_common.h    | 104 ++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 100 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 82d973a420..087560f43f 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -153,83 +153,6 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
 	return 0;
 }
 
-static void
-cn10k_sso_hws_reset(void *arg, void *hws)
-{
-	struct cnxk_sso_evdev *dev = arg;
-	struct cn10k_sso_hws *ws = hws;
-	uintptr_t base = ws->base;
-	uint64_t pend_state;
-	union {
-		__uint128_t wdata;
-		uint64_t u64[2];
-	} gw;
-	uint8_t pend_tt;
-	bool is_pend;
-
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
-	/* Wait till getwork/swtp/waitw/desched completes. */
-	is_pend = false;
-	/* Work in WQE0 is always consumed, unless its a SWTAG. */
-	pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
-	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
-	    ws->swtag_req)
-		is_pend = true;
-
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
-			       BIT_ULL(56) | BIT_ULL(54)));
-	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
-			cnxk_sso_hws_swtag_untag(base +
-						 SSOW_LF_GWS_OP_SWTAG_UNTAG);
-		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-	} else if (pend_tt != SSO_TT_EMPTY) {
-		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-	}
-
-	/* Wait for desched to complete. */
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
-
-	switch (dev->gw_mode) {
-	case CNXK_GW_MODE_PREF:
-	case CNXK_GW_MODE_PREF_WFE:
-		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
-			;
-		break;
-	case CNXK_GW_MODE_NONE:
-	default:
-		break;
-	}
-
-	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
-	    SSO_TT_EMPTY) {
-		plt_write64(BIT_ULL(16) | 1,
-			    ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		do {
-			roc_load_pair(gw.u64[0], gw.u64[1],
-				      ws->base + SSOW_LF_GWS_WQE0);
-		} while (gw.u64[0] & BIT_ULL(63));
-		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-			if (pend_tt == SSO_TT_ATOMIC ||
-			    pend_tt == SSO_TT_ORDERED)
-				cnxk_sso_hws_swtag_untag(
-					base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
-			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-		}
-	}
-
-	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	rte_mb();
-}
-
 static void
 cn10k_sso_set_rsrc(void *arg)
 {
@@ -707,24 +630,6 @@ cn10k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn10k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
-static void
-cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev)
-{
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
-	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
-	int i, rc;
-
-	plt_sso_dbg();
-	for (i = 0; i < dev->stash_cnt; i++) {
-		stash[i].hwgrp = dev->stash_parse_data[i].queue;
-		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
-		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
-	}
-	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
-	if (rc < 0)
-		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
-}
-
 static int
 cn10k_sso_start(struct rte_eventdev *event_dev)
 {
@@ -736,9 +641,8 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
 	if (rc < 0)
 		return rc;
 
-	cn10k_sso_configure_queue_stash(event_dev);
-	rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
-			    cn10k_sso_hws_flush_events);
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 	if (rc < 0)
 		return rc;
 	cn10k_sso_fp_fns_set(event_dev);
@@ -759,8 +663,7 @@ cn10k_sso_stop(struct rte_eventdev *event_dev)
 	for (i = 0; i < event_dev->data->nb_ports; i++)
 		hws[i] = i;
 	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
-	cnxk_sso_stop(event_dev, cn10k_sso_hws_reset,
-		      cn10k_sso_hws_flush_events);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 }
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 34636c77ce..90902bce40 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -86,6 +86,61 @@ cn20k_sso_hws_release(void *arg, void *hws)
 	memset(ws, 0, sizeof(*ws));
 }
 
+static int
+cn20k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cnxk_handle_event_t fn,
+			   void *arg)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg);
+	uint64_t retry = CNXK_SSO_FLUSH_RETRY_MAX;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t cq_ds_cnt = 1;
+	uint64_t aq_cnt = 1;
+	uint64_t ds_cnt = 1;
+	struct rte_event ev;
+	uint64_t val, req;
+
+	plt_write64(0, base + SSO_LF_GGRP_QCTL);
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	req = queue_id;	    /* GGRP ID */
+	req |= BIT_ULL(18); /* Grouped */
+	req |= BIT_ULL(16); /* WAIT */
+
+	aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+	ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+	cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+	cq_ds_cnt &= 0x3FFF3FFF0000;
+
+	while (aq_cnt || cq_ds_cnt || ds_cnt) {
+		plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (fn != NULL && ev.u64 != 0)
+			fn(arg, ev);
+		if (ev.sched_type != SSO_TT_EMPTY)
+			cnxk_sso_hws_swtag_flush(ws->base);
+		else if (retry-- == 0)
+			break;
+		do {
+			val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (val & BIT_ULL(56));
+		aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+		ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+		cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+		/* Extract cq and ds count */
+		cq_ds_cnt &= 0x3FFF3FFF0000;
+	}
+
+	if (aq_cnt || cq_ds_cnt || ds_cnt)
+		return -EAGAIN;
+
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	rte_mb();
+
+	return 0;
+}
+
 static void
 cn20k_sso_set_rsrc(void *arg)
 {
@@ -314,6 +369,25 @@ cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
+static int
+cn20k_sso_start(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int rc, i;
+
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set(event_dev);
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -332,6 +406,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_link_profile = cn20k_sso_port_link_profile,
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
+
+	.dev_start = cn20k_sso_start,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
index 712d82bee7..c361d0530d 100644
--- a/drivers/event/cnxk/cnxk_common.h
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -8,6 +8,15 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+struct cnxk_sso_hws_prf {
+	uint64_t base;
+	uint32_t gw_wdata;
+	void *lookup_mem;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+};
+
 static uint32_t
 cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
 {
@@ -52,4 +61,99 @@ cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
 	return gw_mode;
 }
 
+static void
+cnxk_sso_hws_reset(void *arg, void *ws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cnxk_sso_hws_prf *ws_prf;
+	uint64_t pend_state;
+	uint8_t swtag_req;
+	uintptr_t base;
+	uint8_t hws_id;
+	union {
+		__uint128_t wdata;
+		uint64_t u64[2];
+	} gw;
+	uint8_t pend_tt;
+	bool is_pend;
+
+	ws_prf = ws;
+	base = ws_prf->base;
+	hws_id = ws_prf->hws_id;
+	swtag_req = ws_prf->swtag_req;
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	/* Wait till getwork/swtp/waitw/desched completes. */
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || swtag_req)
+		is_pend = true;
+
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state &
+		 (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+			cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+	} else if (pend_tt != SSO_TT_EMPTY) {
+		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+	}
+
+	/* Wait for desched to complete. */
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, base + SSOW_LF_GWS_OP_GET_WORK0);
+		do {
+			roc_load_pair(gw.u64[0], gw.u64[1], base + SSOW_LF_GWS_WQE0);
+		} while (gw.u64[0] & BIT_ULL(63));
+		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+			if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+				cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+		}
+	}
+
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	rte_mb();
+}
+
+static void
+cnxk_sso_configure_queue_stash(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
+	int i, rc;
+
+	plt_sso_dbg();
+	for (i = 0; i < dev->stash_cnt; i++) {
+		stash[i].hwgrp = dev->stash_parse_data[i].queue;
+		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
+		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
+	}
+	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
+	if (rc < 0)
+		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+}
+
 #endif /* __CNXK_COMMON_H__ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 13/22] event/cnxk: add CN20K device stop and close
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (10 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 12/22] event/cnxk: add CN20K device start pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
                       ` (9 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event device stop and close callback functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 90902bce40..47a48d0ff2 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -388,6 +388,25 @@ cn20k_sso_start(struct rte_eventdev *event_dev)
 	return rc;
 }
 
+static void
+cn20k_sso_stop(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int i;
+
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+}
+
+static int
+cn20k_sso_close(struct rte_eventdev *event_dev)
+{
+	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -408,6 +427,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
 	.dev_start = cn20k_sso_start,
+	.dev_stop = cn20k_sso_stop,
+	.dev_close = cn20k_sso_close,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 14/22] event/cnxk: add CN20K xstats, selftest and dump
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (11 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
                       ` (8 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add selftest to verify SSO, xstats to get queue specific
stats and add function to dump internal state of SSO.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test/test_eventdev.c                    |  7 +++++++
 drivers/event/cnxk/cn20k_eventdev.c         | 12 ++++++++++++
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  8 ++++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c
index a9258d2abc..f4da07c596 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -1521,6 +1521,12 @@ test_eventdev_selftest_cn10k(void)
 	return test_eventdev_selftest_impl("event_cn10k", "");
 }
 
+static int
+test_eventdev_selftest_cn20k(void)
+{
+	return test_eventdev_selftest_impl("event_cn20k", "");
+}
+
 #endif /* !RTE_EXEC_ENV_WINDOWS */
 
 REGISTER_FAST_TEST(eventdev_common_autotest, true, true, test_eventdev_common);
@@ -1532,5 +1538,6 @@ REGISTER_DRIVER_TEST(eventdev_selftest_dpaa2, test_eventdev_selftest_dpaa2);
 REGISTER_DRIVER_TEST(eventdev_selftest_dlb2, test_eventdev_selftest_dlb2);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn9k, test_eventdev_selftest_cn9k);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn10k, test_eventdev_selftest_cn10k);
+REGISTER_DRIVER_TEST(eventdev_selftest_cn20k, test_eventdev_selftest_cn20k);
 
 #endif /* !RTE_EXEC_ENV_WINDOWS */
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 47a48d0ff2..8037c0229a 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -407,6 +407,12 @@ cn20k_sso_close(struct rte_eventdev *event_dev)
 	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
 }
 
+static int
+cn20k_sso_selftest(void)
+{
+	return cnxk_sso_selftest(RTE_STR(event_cn20k));
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -426,9 +432,15 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.xstats_get = cnxk_sso_xstats_get,
+	.xstats_reset = cnxk_sso_xstats_reset,
+	.xstats_get_names = cnxk_sso_xstats_get_names,
+
+	.dump = cnxk_sso_dump,
 	.dev_start = cn20k_sso_start,
 	.dev_stop = cn20k_sso_stop,
 	.dev_close = cn20k_sso_close,
+	.dev_selftest = cn20k_sso_selftest,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 7a3262bcff..8f3d0982e9 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1566,16 +1566,16 @@ cnxk_sso_selftest(const char *dev_name)
 			return rc;
 	}
 
-	if (roc_model_runtime_is_cn10k()) {
-		printf("Verifying CN10K workslot getwork mode none\n");
+	if (roc_model_runtime_is_cn10k() || roc_model_runtime_is_cn20k()) {
+		printf("Verifying %s workslot getwork mode none\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode prefetch\n");
+		printf("Verifying %s workslot getwork mode prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
+		printf("Verifying %s workslot getwork mode smart prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 15/22] event/cnxk: support CN20K Rx adapter
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (12 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
                       ` (7 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for CN20K event eth Rx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 121 +++++++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_eventdev.h |   4 +
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 8037c0229a..bb6a6f94e0 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,6 +4,7 @@
 
 #include "roc_api.h"
 
+#include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
@@ -413,6 +414,117 @@ cn20k_sso_selftest(void)
 	return cnxk_sso_selftest(RTE_STR(event_cn20k));
 }
 
+static int
+cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+			      const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+	int rc;
+
+	RTE_SET_USED(event_dev);
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 9);
+	if (rc)
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+	else
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+	return 0;
+}
+
+static void
+cn20k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		ws->xaq_lmt = dev->xaq_lmt;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+		ws->tstamp = dev->tstamp;
+		if (lookup_mem)
+			ws->lookup_mem = lookup_mem;
+	}
+}
+
+static void
+eventdev_fops_tstamp_update(struct rte_eventdev *event_dev)
+{
+	struct rte_event_fp_ops *fp_op = rte_event_fp_ops + event_dev->data->dev_id;
+
+	fp_op->dequeue_burst = event_dev->dequeue_burst;
+}
+
+static void
+cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct cnxk_eth_dev *cnxk_eth_dev = dev->data->dev_private;
+	struct rte_eventdev *event_dev = cnxk_eth_dev->evdev_priv;
+	struct cnxk_sso_evdev *evdev = cnxk_sso_pmd_priv(event_dev);
+
+	evdev->rx_offloads |= flags;
+	if (ptp_en)
+		evdev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
+	else
+		evdev->tstamp[port_id] = NULL;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	eventdev_fops_tstamp_update(event_dev);
+}
+
+static int
+cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
+			       const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash;
+	struct cn20k_eth_rxq *rxq;
+	void *lookup_mem;
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	if (rc)
+		return -EINVAL;
+
+	cnxk_eth_dev->cnxk_sso_ptp_tstamp_cb = cn20k_sso_tstamp_hdl_update;
+	cnxk_eth_dev->evdev_priv = (struct rte_eventdev *)(uintptr_t)event_dev;
+
+	rxq = eth_dev->data->rx_queues[0];
+	lookup_mem = rxq->lookup_mem;
+	cn20k_sso_set_priv_mem(event_dev, lookup_mem);
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	if (roc_feature_sso_has_stash() && dev->nb_event_ports > 1) {
+		stash.hwgrp = queue_conf->ev.queue_id;
+		stash.stash_offset = CN20K_SSO_DEFAULT_STASH_OFFSET;
+		stash.stash_count = CN20K_SSO_DEFAULT_STASH_LENGTH;
+		rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1);
+		if (rc < 0)
+			plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+	}
+
+	return 0;
+}
+
+static int
+cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id)
+{
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -432,6 +544,12 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.eth_rx_adapter_caps_get = cn20k_sso_rx_adapter_caps_get,
+	.eth_rx_adapter_queue_add = cn20k_sso_rx_adapter_queue_add,
+	.eth_rx_adapter_queue_del = cn20k_sso_rx_adapter_queue_del,
+	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -508,4 +626,5 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
-			      CNXK_SSO_STASH "=<string>");
+			      CNXK_SSO_STASH "=<string>"
+			      CNXK_SSO_FORCE_BP "=1");
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 5b6c558d5a..7a6363a89e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -11,9 +11,13 @@
 struct __rte_cache_aligned cn20k_sso_hws {
 	uint64_t base;
 	uint32_t gw_wdata;
+	void *lookup_mem;
 	uint64_t gw_rdata;
 	uint8_t swtag_req;
 	uint8_t hws_id;
+	/* PTP timestamp */
+	struct cnxk_timesync_info **tstamp;
+	uint64_t meta_aura;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
 	int64_t __rte_atomic *fc_cache_space;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 16/22] event/cnxk: support CN20K Rx adapter fast path
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (13 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
                       ` (6 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c           | 122 ++++++++++++-
 drivers/event/cnxk/cn20k_worker.c             |  54 ------
 drivers/event/cnxk/cn20k_worker.h             | 165 +++++++++++++++++-
 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c |  22 +++
 .../cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c   |  22 +++
 .../event/cnxk/deq/cn20k/deq_112_127_burst.c  |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_seg_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_tmo_burst.c    |  22 +++
 .../deq/cn20k/deq_112_127_tmo_seg_burst.c     |  22 +++
 .../event/cnxk/deq/cn20k/deq_16_31_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_32_47_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_burst.c      |  23 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_48_63_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_64_79_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_80_95_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_96_111_burst.c   |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_seg_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_all_offload.c    |  65 +++++++
 drivers/event/cnxk/meson.build                |  43 +++++
 37 files changed, 1085 insertions(+), 69 deletions(-)
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index bb6a6f94e0..aa2f32e005 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -11,6 +11,9 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+#define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
+	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -164,21 +167,124 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+#if defined(RTE_ARCH_ARM64)
+static inline void
+cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
+{
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	const event_dequeue_burst_t sso_hws_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_seg_burst);
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_seg_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_deq_seg_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_seg_burst);
+		}
+	} else {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst, sso_hws_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_burst);
+		}
+	}
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
+static inline void
+cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
+	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
+		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+#endif
 
 static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	cn20k_sso_fp_blk_fns_set(event_dev);
+	cn20k_sso_fp_tmplt_fns_set(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
-	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
-	if (dev->deq_tmo_ns)
-		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
-
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
 	event_dev->preschedule = cn20k_sso_hws_preschedule;
@@ -285,7 +391,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
 	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
 
-	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	cn20k_sso_hws_get_work_empty(ws, &ev,
+				     (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 	if (is_pend && ev.u64)
 		if (flush_cb)
 			flush_cb(event_dev->data->dev_id, ev, args);
@@ -311,7 +418,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
 		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		cn20k_sso_hws_get_work_empty(
+			ws, &ev, (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 		if (ev.u64) {
 			if (flush_cb)
 				flush_cb(event_dev->data->dev_id, ev, args);
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index ebfe863bc5..53daf3b4b0 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -423,57 +423,3 @@ cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
 	RTE_SET_USED(type);
 	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
 }
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return 1;
-	}
-
-	return cn20k_sso_hws_get_work(ws, ev, 0);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return ret;
-	}
-
-	ret = cn20k_sso_hws_get_work(ws, ev, 0);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn20k_sso_hws_get_work(ws, ev, 0);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			    uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index dd8b72bc53..9075073fd2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -8,16 +8,64 @@
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
+#include "cn20k_rx.h"
 #include "cnxk_worker.h"
 
+/* CN20K Rx event fastpath */
+
+static __rte_always_inline void
+cn20k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id, const uint32_t tag,
+		  const uint32_t flags, const void *const lookup_mem, uintptr_t cpth,
+		  uintptr_t sa_base)
+{
+	const uint64_t mbuf_init =
+		0x100010000ULL | RTE_PKTMBUF_HEADROOM | (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+	struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf;
+
+	cn20k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, (struct rte_mbuf *)mbuf, lookup_mem,
+			      mbuf_init | ((uint64_t)port_id) << 48, cpth, sa_base, flags);
+}
+
+static void
+cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info *tstamp)
+{
+	uint64_t tstamp_ptr;
+	uint8_t laptr;
+
+	laptr = (uint8_t)*(uint64_t *)(u64 + (CNXK_SSO_WQE_LAYR_PTR * sizeof(uint64_t)));
+	if (laptr == sizeof(uint64_t)) {
+		/* Extracting tstamp, if PTP enabled*/
+		tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64) + CNXK_SSO_WQE_SG_PTR);
+		cn20k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, true,
+					 (uint64_t *)tstamp_ptr);
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
-	RTE_SET_USED(ws);
-	RTE_SET_USED(flags);
+	uintptr_t sa_base = 0;
 
 	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
 		 (u64[0] & 0xffffffff);
+	if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		uintptr_t cpth = 0;
+		uint64_t mbuf;
+
+		mbuf = u64[1] - sizeof(struct rte_mbuf);
+		rte_prefetch0((void *)mbuf);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(((struct rte_mbuf *)mbuf)->pool, (void **)&mbuf, 1, 1);
+
+		u64[0] = CNXK_CLR_SUB_EVENT(u64[0]);
+		cn20k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xFFFFF, flags, ws->lookup_mem, cpth,
+				  sa_base);
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
+		u64[1] = mbuf;
+	}
 }
 
 static __rte_always_inline uint16_t
@@ -150,11 +198,112 @@ int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
 					       enum rte_event_dev_preschedule_type type);
 void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
-uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-					   uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-					       uint16_t nb_events, uint64_t timeout_ticks);
+#define R(name, flags)                                                                             \
+	uint16_t __rte_hot cn20k_sso_hws_deq_burst_##name(                                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_burst_##name(                                      \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_seg_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_seg_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_seg_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_seg_burst_##name(                              \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_burst_##name(                                    \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_seg_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_seg_burst_##name(                            \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_seg_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_seg_burst_##name(                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_DEQ(fn, flags)                                                                         \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(timeout_ticks);                                                       \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return 1;                                                                  \
+		}                                                                                  \
+		return cn20k_sso_hws_get_work(ws, ev, flags);                                      \
+	}
+
+#define SSO_DEQ_SEG(fn, flags) SSO_DEQ(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define SSO_DEQ_TMO(fn, flags)                                                                     \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint16_t ret = 1;                                                                  \
+		uint64_t iter;                                                                     \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return ret;                                                                \
+		}                                                                                  \
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);                                       \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)                         \
+			ret = cn20k_sso_hws_get_work(ws, ev, flags);                               \
+		return ret;                                                                        \
+	}
+
+#define SSO_DEQ_TMO_SEG(fn, flags) SSO_DEQ_TMO(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define R(name, flags)                                                                             \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                                                   \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)                              \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                                           \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                                           \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)                                   \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_CMN_DEQ_BURST(fnb, fn, flags)                                                          \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+#define SSO_CMN_DEQ_SEG_BURST(fnb, fn, flags)                                                      \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[],
+						       uint16_t nb_events, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[],
+							   uint16_t nb_events,
+							   uint64_t timeout_ticks);
 
 #endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
new file mode 100644
index 0000000000..f7e0e8fe71
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
new file mode 100644
index 0000000000..7d5d4823c3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		      cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
new file mode 100644
index 0000000000..1bdc4bc82d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
new file mode 100644
index 0000000000..d3ed5fcac0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                                             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,                                  \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)                                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,                             \
+			  cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
new file mode 100644
index 0000000000..29c21441cf
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
new file mode 100644
index 0000000000..004b5ecb95
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
new file mode 100644
index 0000000000..d544b39e9e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
new file mode 100644
index 0000000000..ba7a1207ad
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
new file mode 100644
index 0000000000..eb7382e9d9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
new file mode 100644
index 0000000000..770b7221e6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
new file mode 100644
index 0000000000..1e71d22fc3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
new file mode 100644
index 0000000000..1a9e7efa0a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
new file mode 100644
index 0000000000..3d51bd6659
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
new file mode 100644
index 0000000000..851b5b7d31
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
new file mode 100644
index 0000000000..038ba726a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name,                   \
+			  flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
new file mode 100644
index 0000000000..68fb3ff53d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
new file mode 100644
index 0000000000..84f3ccd39c
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
new file mode 100644
index 0000000000..417f622412
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
new file mode 100644
index 0000000000..7fbea69134
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
new file mode 100644
index 0000000000..3bee216768
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
new file mode 100644
index 0000000000..9b341a0df5
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
new file mode 100644
index 0000000000..1f051f74a9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			   cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
new file mode 100644
index 0000000000..c134e27f25
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
new file mode 100644
index 0000000000..849e8e12fc
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		 cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
new file mode 100644
index 0000000000..9724caf5d6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
new file mode 100644
index 0000000000..997c208511
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
new file mode 100644
index 0000000000..bcf32e646b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
new file mode 100644
index 0000000000..b24e73439a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
new file mode 100644
index 0000000000..c03d034b66
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
new file mode 100644
index 0000000000..b37ef7a998
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
new file mode 100644
index 0000000000..da76b589a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
new file mode 100644
index 0000000000..3a8c02e4d2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_all_offload.c b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
new file mode 100644
index 0000000000..3983736b7e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[], uint16_t nb_events,
+				    uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[], uint16_t nb_events,
+					uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_TSTAMP_F | NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index ee27d38115..2f7fe0dc47 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -330,6 +330,49 @@ sources += files(
         'cn20k_eventdev.c',
         'cn20k_worker.c',
 )
+
+if host_machine.cpu_family().startswith('aarch') and not disable_template
+sources += files(
+        'deq/cn20k/deq_0_15_burst.c',
+        'deq/cn20k/deq_16_31_burst.c',
+        'deq/cn20k/deq_32_47_burst.c',
+        'deq/cn20k/deq_48_63_burst.c',
+        'deq/cn20k/deq_64_79_burst.c',
+        'deq/cn20k/deq_80_95_burst.c',
+        'deq/cn20k/deq_96_111_burst.c',
+        'deq/cn20k/deq_112_127_burst.c',
+        'deq/cn20k/deq_0_15_seg_burst.c',
+        'deq/cn20k/deq_16_31_seg_burst.c',
+        'deq/cn20k/deq_32_47_seg_burst.c',
+        'deq/cn20k/deq_48_63_seg_burst.c',
+        'deq/cn20k/deq_64_79_seg_burst.c',
+        'deq/cn20k/deq_80_95_seg_burst.c',
+        'deq/cn20k/deq_96_111_seg_burst.c',
+        'deq/cn20k/deq_112_127_seg_burst.c',
+        'deq/cn20k/deq_0_15_tmo_burst.c',
+        'deq/cn20k/deq_16_31_tmo_burst.c',
+        'deq/cn20k/deq_32_47_tmo_burst.c',
+        'deq/cn20k/deq_48_63_tmo_burst.c',
+        'deq/cn20k/deq_64_79_tmo_burst.c',
+        'deq/cn20k/deq_80_95_tmo_burst.c',
+        'deq/cn20k/deq_96_111_tmo_burst.c',
+        'deq/cn20k/deq_112_127_tmo_burst.c',
+        'deq/cn20k/deq_0_15_tmo_seg_burst.c',
+        'deq/cn20k/deq_16_31_tmo_seg_burst.c',
+        'deq/cn20k/deq_32_47_tmo_seg_burst.c',
+        'deq/cn20k/deq_48_63_tmo_seg_burst.c',
+        'deq/cn20k/deq_64_79_tmo_seg_burst.c',
+        'deq/cn20k/deq_80_95_tmo_seg_burst.c',
+        'deq/cn20k/deq_96_111_tmo_seg_burst.c',
+        'deq/cn20k/deq_112_127_tmo_seg_burst.c',
+        'deq/cn20k/deq_all_offload.c',
+)
+
+else
+sources += files(
+        'deq/cn20k/deq_all_offload.c',
+)
+endif
 endif
 
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 17/22] event/cnxk: support CN20K Tx adapter
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (14 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
                       ` (5 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c  | 126 +++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h  |   4 +
 drivers/event/cnxk/cn20k_tx_worker.h |  16 ++++
 3 files changed, 146 insertions(+)
 create mode 100644 drivers/event/cnxk/cn20k_tx_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index aa2f32e005..5751a391be 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -6,6 +6,7 @@
 
 #include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
+#include "cn20k_tx_worker.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
@@ -167,6 +168,35 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+static int
+cn20k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	if (dev->tx_adptr_data == NULL)
+		return 0;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		void *ws_cookie;
+
+		ws_cookie = cnxk_sso_hws_get_cookie(ws);
+		ws_cookie = rte_realloc_socket(ws_cookie,
+					       sizeof(struct cnxk_sso_hws_cookie) +
+						       sizeof(struct cn20k_sso_hws) +
+						       dev->tx_adptr_data_sz,
+					       RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+		if (ws_cookie == NULL)
+			return -ENOMEM;
+		ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+		memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, dev->tx_adptr_data_sz);
+		event_dev->data->ports[i] = ws;
+	}
+
+	return 0;
+}
+
 #if defined(RTE_ARCH_ARM64)
 static inline void
 cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
@@ -633,6 +663,95 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+			      uint32_t *caps)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		*caps = 0;
+	else
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+	return 0;
+}
+
+static void
+cn20k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cn20k_eth_txq *txq;
+	struct roc_nix_sq *sq;
+	int i;
+
+	if (tx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+			cn20k_sso_txq_fc_update(eth_dev, i);
+	} else {
+		uint16_t sqes_per_sqb;
+
+		sq = &cnxk_eth_dev->sqs[tx_queue_id];
+		txq = eth_dev->data->tx_queues[tx_queue_id];
+		sqes_per_sqb = 1U << txq->sqes_per_sqb_log2;
+		if (cnxk_eth_dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
+			sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc / sqes_per_sqb);
+		txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
+	}
+}
+
+static int
+cn20k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint64_t tx_offloads;
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+
+	/* Can't enable tstamp if all the ports don't have it enabled. */
+	tx_offloads = cnxk_eth_dev->tx_offload_flags;
+	if (dev->tx_adptr_configured) {
+		uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+		uint8_t tstmp_ena = !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+		if (tstmp_ena && !tstmp_req)
+			dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+		else if (!tstmp_ena && tstmp_req)
+			tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+	}
+
+	dev->tx_offloads |= tx_offloads;
+	cn20k_sso_txq_fc_update(eth_dev, tx_queue_id);
+	rc = cn20k_sso_updt_tx_adptr_data(event_dev);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	dev->tx_adptr_configured = 1;
+
+	return 0;
+}
+
+static int
+cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+	return cn20k_sso_updt_tx_adptr_data(event_dev);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -658,6 +777,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
+	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
+	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
+	.eth_tx_adapter_start = cnxk_sso_tx_adapter_start,
+	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
+	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 7a6363a89e..8ea2878fa5 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -25,6 +25,10 @@ struct __rte_cache_aligned cn20k_sso_hws {
 	uintptr_t grp_base;
 	uint16_t xae_waes;
 	int32_t xaq_lmt;
+	/* Tx Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
+	uint64_t lso_tun_fmt;
+	uint8_t tx_adptr_data[];
 };
 
 #endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
new file mode 100644
index 0000000000..63fbdf5328
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_TX_WORKER_H__
+#define __CN20K_TX_WORKER_H__
+
+#include <rte_eventdev.h>
+#include <rte_vect.h>
+
+#include "cn20k_eventdev.h"
+#include "cn20k_tx.h"
+#include "cnxk_eventdev_dp.h"
+#include <rte_event_eth_tx_adapter.h>
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 18/22] event/cnxk: support CN20K Tx adapter fast path
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (15 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 19/22] common/cnxk: add SSO event aggregator pbhagavatula
                       ` (4 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c          |  29 +++
 drivers/event/cnxk/cn20k_tx_worker.h         | 176 +++++++++++++++++++
 drivers/event/cnxk/meson.build               |  20 +++
 drivers/event/cnxk/tx/cn20k/tx_0_15.c        |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c    |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127.c     |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111.c      |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c  |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_all_offload.c |  40 +++++
 20 files changed, 561 insertions(+)
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 5751a391be..3c95247499 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -15,6 +15,9 @@
 #define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
 
+#define CN20K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                                               \
+	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -252,6 +255,19 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 #undef R
 	};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq_seg[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_seg_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
 	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
 		if (dev->rx_offloads & NIX_RX_REAS_F) {
 			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
@@ -284,6 +300,12 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 		}
 	}
 
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq_seg);
+	else
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq);
+
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 #else
 	RTE_SET_USED(event_dev);
 #endif
@@ -298,6 +320,13 @@ cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
 	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
 	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
 		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+	event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	if (dev->tx_offloads & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | NIX_TX_OFFLOAD_VLAN_QINQ_F |
+				NIX_TX_OFFLOAD_TSO_F | NIX_TX_OFFLOAD_TSTAMP_F)) {
+		event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+		event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+	}
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 63fbdf5328..c8ab560b0e 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -13,4 +13,180 @@
 #include "cnxk_eventdev_dp.h"
 #include <rte_event_eth_tx_adapter.h>
 
+/* CN20K Tx event fastpath */
+
+static __rte_always_inline struct cn20k_eth_txq *
+cn20k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
+{
+	return (struct cn20k_eth_txq *)(txq_data[(txq_data[m->port] >> 48) +
+						 rte_event_eth_tx_adapter_txq_get(m)] &
+					(BIT_ULL(48) - 1));
+}
+
+static __rte_always_inline void
+cn20k_sso_txq_fc_wait(const struct cn20k_eth_txq *txq)
+{
+	int64_t avail;
+
+#ifdef RTE_ARCH_ARM64
+	int64_t val;
+
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.gt .Ldne%=				\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.le .Lrty%=				\n"
+		     ".Ldne%=:						\n"
+		     : [refill] "=&r"(avail), [val] "=&r" (val)
+		     : [addr] "r" (txq->fc_mem), [adj] "r" (txq->nb_sqb_bufs_adj),
+		       [shft] "r" (txq->sqes_per_sqb_log2)
+		     : "memory");
+#else
+	do {
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
+	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
+#endif
+}
+
+static __rte_always_inline int32_t
+cn20k_sso_sq_depth(const struct cn20k_eth_txq *txq)
+{
+	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
+	return (avail << txq->sqes_per_sqb_log2) - avail;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
+		 uintptr_t lmt_addr, uint8_t sched_type, const uint64_t *txq_data,
+		 const uint32_t flags)
+{
+	uint8_t lnum = 0, loff = 0, shft = 0;
+	struct rte_mbuf *extm = NULL;
+	struct cn20k_eth_txq *txq;
+	uintptr_t laddr;
+	uint16_t segdw;
+	uintptr_t pa;
+	bool sec;
+
+	txq = cn20k_sso_hws_xtract_meta(m, txq_data);
+	if (cn20k_sso_sq_depth(txq) <= 0)
+		return 0;
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1);
+
+	cn20k_nix_tx_skeleton(txq, cmd, flags, 0);
+	/* Perform header writes before barrier
+	 * for TSO
+	 */
+	if (flags & NIX_TX_OFFLOAD_TSO_F)
+		cn20k_nix_xmit_prepare_tso(m, flags);
+
+	cn20k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, &sec, txq->mark_flag,
+			       txq->mark_fmt);
+
+	laddr = lmt_addr;
+	/* Prepare CPT instruction and get nixtx addr if
+	 * it is for CPT on same lmtline.
+	 */
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff, &shft, txq->sa_base,
+				   flags);
+
+	/* Move NIX desc to LMT/NIXTX area */
+	cn20k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
+
+	if (flags & NIX_TX_MULTI_SEG_F)
+		segdw = cn20k_nix_prepare_mseg(txq, m, &extm, (uint64_t *)laddr, flags);
+	else
+		segdw = cn20k_nix_tx_ext_subs(flags) + 2;
+
+	cn20k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		pa = txq->cpt_io_addr | 3 << 4;
+	else
+		pa = txq->io_addr | ((segdw - 1) << 4);
+
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
+
+	cn20k_sso_txq_fc_wait(txq);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_sec_fc_wait_one(txq);
+
+	roc_lmt_submit_steorl(lmt_id, pa);
+
+	/* Memory barrier to make sure lmtst store completes */
+	rte_io_wmb();
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+		cn20k_nix_free_extmbuf(extm);
+
+	return 1;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
+		       const uint64_t *txq_data, const uint32_t flags)
+{
+	struct rte_mbuf *m;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+
+	lmt_addr = ws->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	m = ev->mbuf;
+	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
+}
+
+#define T(name, sz, flags)                                                                         \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_##name(void *port, struct rte_event ev[],    \
+							     uint16_t nb_events);                  \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
+#define SSO_TX(fn, sz, flags)                                                                      \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint64_t cmd[sz];                                                                  \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data, flags);         \
+	}
+
+#define SSO_TX_SEG(fn, sz, flags)                                                                  \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];                               \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data,                 \
+					      (flags) | NIX_TX_MULTI_SEG_F);                       \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[],
+							      uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+								  uint16_t nb_events);
+
 #endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 2f7fe0dc47..25ac26dc30 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -368,9 +368,29 @@ sources += files(
         'deq/cn20k/deq_all_offload.c',
 )
 
+sources += files(
+        'tx/cn20k/tx_0_15.c',
+        'tx/cn20k/tx_16_31.c',
+        'tx/cn20k/tx_32_47.c',
+        'tx/cn20k/tx_48_63.c',
+        'tx/cn20k/tx_64_79.c',
+        'tx/cn20k/tx_80_95.c',
+        'tx/cn20k/tx_96_111.c',
+        'tx/cn20k/tx_112_127.c',
+        'tx/cn20k/tx_0_15_seg.c',
+        'tx/cn20k/tx_16_31_seg.c',
+        'tx/cn20k/tx_32_47_seg.c',
+        'tx/cn20k/tx_48_63_seg.c',
+        'tx/cn20k/tx_64_79_seg.c',
+        'tx/cn20k/tx_80_95_seg.c',
+        'tx/cn20k/tx_96_111_seg.c',
+        'tx/cn20k/tx_112_127_seg.c',
+        'tx/cn20k/tx_all_offload.c',
+)
 else
 sources += files(
         'deq/cn20k/deq_all_offload.c',
+        'tx/cn20k/tx_all_offload.c',
 )
 endif
 endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15.c b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
new file mode 100644
index 0000000000..b681bc8ab0
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
new file mode 100644
index 0000000000..1dacb63d4b
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127.c b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
new file mode 100644
index 0000000000..abdb8b76a1
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
new file mode 100644
index 0000000000..c39d331b25
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31.c b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
new file mode 100644
index 0000000000..5b88c47914
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
new file mode 100644
index 0000000000..13f00ac478
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47.c b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
new file mode 100644
index 0000000000..1f6008c425
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
new file mode 100644
index 0000000000..587f22df3a
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63.c b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
new file mode 100644
index 0000000000..c712825417
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
new file mode 100644
index 0000000000..1fc11ec904
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79.c b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
new file mode 100644
index 0000000000..0e427f79d8
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
new file mode 100644
index 0000000000..6e1ae41b26
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95.c b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
new file mode 100644
index 0000000000..8c87d2341d
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
new file mode 100644
index 0000000000..43a143f4bd
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111.c b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
new file mode 100644
index 0000000000..1a43af8b02
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
new file mode 100644
index 0000000000..e0e1d8a4ef
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_all_offload.c b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
new file mode 100644
index 0000000000..d2158a4256
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[], uint16_t nb_events)
+{
+	const uint32_t flags = (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_MBUF_NOFF_F |
+				NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+					       uint16_t nb_events)
+{
+	const uint32_t flags =
+		(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+		 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_MBUF_NOFF_F | NIX_TX_OFFLOAD_TSO_F |
+		 NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_SECURITY_F | NIX_TX_MULTI_SEG_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 19/22] common/cnxk: add SSO event aggregator
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (16 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
                       ` (3 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add configuration APIs for CN20K SSO event
aggregator which allows SSO to generate event
vectors.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/sso.h        |  33 ++++
 drivers/common/cnxk/roc_mbox.h      |  21 +++
 drivers/common/cnxk/roc_model.h     |  13 ++
 drivers/common/cnxk/roc_nix_queue.c |   5 -
 drivers/common/cnxk/roc_sso.c       | 230 +++++++++++++++++++++++++++-
 drivers/common/cnxk/roc_sso.h       |  19 ++-
 drivers/common/cnxk/roc_sso_priv.h  |   4 +
 drivers/common/cnxk/version.map     |   4 +
 8 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/drivers/common/cnxk/hw/sso.h b/drivers/common/cnxk/hw/sso.h
index 09b8d4955f..79337a8a3b 100644
--- a/drivers/common/cnxk/hw/sso.h
+++ b/drivers/common/cnxk/hw/sso.h
@@ -146,6 +146,7 @@
 #define SSO_LF_GGRP_OP_ADD_WORK0 (0x0ull)
 #define SSO_LF_GGRP_OP_ADD_WORK1 (0x8ull)
 #define SSO_LF_GGRP_QCTL	 (0x20ull)
+#define SSO_LF_GGRP_TAG_CFG	 (0x40ull)
 #define SSO_LF_GGRP_EXE_DIS	 (0x80ull)
 #define SSO_LF_GGRP_INT		 (0x100ull)
 #define SSO_LF_GGRP_INT_W1S	 (0x108ull)
@@ -159,6 +160,10 @@
 #define SSO_LF_GGRP_MISC_CNT	 (0x200ull)
 #define SSO_LF_GGRP_OP_AW_LMTST	 (0x400ull)
 
+#define SSO_LF_GGRP_AGGR_CFG	    (0x300ull)
+#define SSO_LF_GGRP_AGGR_CTX_BASE   (0x308ull)
+#define SSO_LF_GGRP_AGGR_CTX_INSTOP (0x310ull)
+
 #define SSO_AF_IAQ_FREE_CNT_MASK      0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_MASK     0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_SHIFT    16
@@ -230,5 +235,33 @@
 #define SSO_TT_ATOMIC	(0x1ull)
 #define SSO_TT_UNTAGGED (0x2ull)
 #define SSO_TT_EMPTY	(0x3ull)
+#define SSO_TT_AGG	(0x3ull)
+
+#define SSO_LF_AGGR_INSTOP_FLUSH	(0x0ull)
+#define SSO_LF_AGGR_INSTOP_EVICT	(0x1ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_FLUSH (0x2ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_EVICT (0x3ull)
+
+#define SSO_AGGR_CTX_SZ	    16
+#define SSO_AGGR_NUM_CTX(a) (1 << (a + 6))
+#define SSO_AGGR_MIN_CTX    SSO_AGGR_NUM_CTX(0)
+#define SSO_AGGR_MAX_CTX    SSO_AGGR_NUM_CTX(10)
+#define SSO_AGGR_DEF_TMO    0x3Full
+
+struct sso_agq_ctx {
+	uint64_t ena : 1;
+	uint64_t rsvd_1_3 : 3;
+	uint64_t vwqe_aura : 17;
+	uint64_t rsvd_21_31 : 11;
+	uint64_t tag : 32;
+	uint64_t tt : 2;
+	uint64_t rsvd_66_67 : 2;
+	uint64_t swqe_tag : 12;
+	uint64_t max_vsize_exp : 4;
+	uint64_t vtimewait : 12;
+	uint64_t xqe_type : 4;
+	uint64_t cnt_ena : 1;
+	uint64_t rsvd_101_127 : 27;
+};
 
 #endif /* __SSO_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 63139b5517..db6e8f07b3 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,10 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_AGGR_SET_CONFIG, 0x615, sso_aggr_setconfig, sso_aggr_setconfig,  \
+	  msg_rsp)                                                             \
+	M(SSO_AGGR_GET_STATS, 0x616, sso_aggr_get_stats, sso_info_req,         \
+	  sso_aggr_stats)                                                      \
 	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
@@ -2191,6 +2195,13 @@ struct sso_grp_stash_cfg {
 	uint8_t __io num_linesm1 : 4;
 };
 
+struct sso_aggr_setconfig {
+	struct mbox_msghdr hdr;
+	uint16_t __io npa_pf_func;
+	uint16_t __io hwgrp;
+	uint64_t __io rsvd[2];
+};
+
 struct sso_grp_stats {
 	struct mbox_msghdr hdr;
 	uint16_t __io grp;
@@ -2210,6 +2221,16 @@ struct sso_hws_stats {
 	uint64_t __io arbitration;
 };
 
+struct sso_aggr_stats {
+	struct mbox_msghdr hdr;
+	uint16_t __io grp;
+	uint64_t __io flushed;
+	uint64_t __io completed;
+	uint64_t __io npa_fail;
+	uint64_t __io timeout;
+	uint64_t __io rsvd[4];
+};
+
 /* CPT mailbox error codes
  * Range 901 - 1000.
  */
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 4e686bea2c..0de141b0cc 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include "roc_bits.h"
+#include "roc_constants.h"
 
 extern struct roc_model *roc_model;
 
@@ -157,6 +158,18 @@ roc_model_is_cn20k(void)
 	return roc_model_runtime_is_cn20k();
 }
 
+static inline uint16_t
+roc_model_optimal_align_sz(void)
+{
+	if (roc_model_is_cn9k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn10k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn20k())
+		return ROC_ALIGN << 1;
+	return 128;
+}
+
 static inline uint64_t
 roc_model_is_cn98xx(void)
 {
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 06029275af..e852211ba4 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -794,9 +794,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 		aq->rq.good_utag = rq->tag_mask >> 24;
 		aq->rq.bad_utag = rq->tag_mask >> 24;
 		aq->rq.ltag = rq->tag_mask & BITMASK_ULL(24, 0);
-
-		if (rq->vwqe_ena)
-			aq->rq.wqe_aura = roc_npa_aura_handle_to_aura(rq->vwqe_aura_handle);
 	} else {
 		/* CQ mode */
 		aq->rq.sso_ena = 0;
@@ -881,8 +878,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 			aq->rq_mask.good_utag = ~aq->rq_mask.good_utag;
 			aq->rq_mask.bad_utag = ~aq->rq_mask.bad_utag;
 			aq->rq_mask.ltag = ~aq->rq_mask.ltag;
-			if (rq->vwqe_ena)
-				aq->rq_mask.wqe_aura = ~aq->rq_mask.wqe_aura;
 		} else {
 			/* CQ mode */
 			aq->rq_mask.sso_ena = ~aq->rq_mask.sso_ena;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 45cf6fc39e..4996329018 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -500,9 +500,231 @@ roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 	mbox_put(mbox);
 }
 
+static void
+sso_agq_op_wait(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	uint64_t reg;
+
+	reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	while (reg & BIT_ULL(2)) {
+		plt_delay_us(100);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) +
+				 SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	}
+}
+
+int
+roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_agq_data *data)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	uint32_t cnt, off;
+	struct mbox *mbox;
+	uintptr_t ptr;
+	uint64_t reg;
+	int rc;
+
+	if (sso->agg_mem[hwgrp] == 0) {
+		mbox = mbox_get(sso->dev.mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			mbox_process(mbox);
+			req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+			if (req == NULL) {
+				plt_err("Failed to allocate AGQ config mbox.");
+				mbox_put(mbox);
+				return -EIO;
+			}
+		}
+
+		req->hwgrp = hwgrp;
+		req->npa_pf_func = idev_npa_pffunc_get();
+		rc = mbox_process(mbox);
+		if (rc < 0) {
+			plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+			mbox_put(mbox);
+			return rc;
+		}
+
+		mbox_put(mbox);
+
+		sso->agg_mem[hwgrp] =
+			(uintptr_t)plt_zmalloc(SSO_AGGR_MIN_CTX * sizeof(struct sso_agq_ctx),
+					       roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0)
+			return -ENOMEM;
+		sso->agg_cnt[hwgrp] = SSO_AGGR_MIN_CTX;
+		sso->agg_used[hwgrp] = 0;
+		plt_wmb();
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = (plt_log2_u32(SSO_AGGR_MIN_CTX) - 6) << 16;
+		reg |= (SSO_AGGR_DEF_TMO << 4) | 1;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	}
+
+	if (sso->agg_cnt[hwgrp] >= SSO_AGGR_MAX_CTX)
+		return -ENOSPC;
+
+	if (sso->agg_cnt[hwgrp] == sso->agg_used[hwgrp]) {
+		ptr = sso->agg_mem[hwgrp];
+		cnt = sso->agg_cnt[hwgrp] << 1;
+		sso->agg_mem[hwgrp] = (uintptr_t)plt_zmalloc(cnt * sizeof(struct sso_agq_ctx),
+							     roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0) {
+			sso->agg_mem[hwgrp] = ptr;
+			return -ENOMEM;
+		}
+
+		memcpy((void *)sso->agg_mem[hwgrp], (void *)ptr,
+		       sso->agg_cnt[hwgrp] * sizeof(struct sso_agq_ctx));
+		plt_wmb();
+		sso_agq_op_wait(roc_sso, hwgrp);
+		/* Base address has changed, evict old entries. */
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg &= ~GENMASK_ULL(19, 16);
+		reg |= (uint64_t)(plt_log2_u32(cnt) - 6) << 16;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg = SSO_LF_AGGR_INSTOP_GLOBAL_EVICT << 4;
+		plt_write64(reg,
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+		sso_agq_op_wait(roc_sso, hwgrp);
+		plt_free((void *)ptr);
+
+		sso->agg_cnt[hwgrp] = cnt;
+		off = sso->agg_used[hwgrp];
+	} else {
+		ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+		for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+			if (!ctx[cnt].ena)
+				break;
+		}
+		if (cnt == sso->agg_cnt[hwgrp])
+			return -EINVAL;
+		off = cnt;
+	}
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += off;
+	ctx->ena = 1;
+	ctx->tt = data->tt;
+	ctx->tag = data->tag;
+	ctx->swqe_tag = data->stag;
+	ctx->cnt_ena = data->cnt_ena;
+	ctx->xqe_type = data->xqe_type;
+	ctx->vtimewait = data->vwqe_wait_tmo;
+	ctx->vwqe_aura = data->vwqe_aura;
+	ctx->max_vsize_exp = data->vwqe_max_sz_exp - 2;
+
+	plt_wmb();
+	sso->agg_used[hwgrp]++;
+
+	return 0;
+}
+
+void
+roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint64_t reg;
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += agq_id;
+
+	if (!ctx->ena)
+		return;
+
+	reg = SSO_LF_AGGR_INSTOP_FLUSH << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+
+	memset(ctx, 0, sizeof(struct sso_agq_ctx));
+	plt_wmb();
+	sso->agg_used[hwgrp]--;
+
+	/* Flush the context from CTX Cache */
+	reg = SSO_LF_AGGR_INSTOP_EVICT << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+}
+
+void
+roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	struct mbox *mbox;
+	uint32_t cnt;
+	int rc;
+
+	if (!roc_sso->feat.eva_present)
+		return;
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+		if (!ctx[cnt].ena)
+			continue;
+		roc_sso_hwgrp_agq_free(roc_sso, hwgrp, cnt);
+	}
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+	plt_free((void *)sso->agg_mem[hwgrp]);
+	sso->agg_mem[hwgrp] = 0;
+	sso->agg_cnt[hwgrp] = 0;
+	sso->agg_used[hwgrp] = 0;
+
+	mbox = mbox_get(sso->dev.mbox);
+	req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+	if (req == NULL) {
+		mbox_process(mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			plt_err("Failed to allocate AGQ config mbox.");
+			mbox_put(mbox);
+			return;
+		}
+	}
+
+	req->hwgrp = hwgrp;
+	req->npa_pf_func = 0;
+	rc = mbox_process(mbox);
+	if (rc < 0)
+		plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+	mbox_put(mbox);
+}
+
+uint32_t
+roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag_mask,
+			   uint8_t xqe_type)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint32_t i;
+
+	plt_rmb();
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (i = 0; i < sso->agg_used[hwgrp]; i++) {
+		if (!ctx[i].ena)
+			continue;
+		if (ctx[i].tag == tag_mask && ctx[i].xqe_type == xqe_type)
+			return i;
+	}
+
+	return UINT32_MAX;
+}
+
 int
-roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
-			struct roc_sso_hwgrp_stats *stats)
+roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_hwgrp_stats *stats)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
 	struct sso_grp_stats *req_rsp;
@@ -1058,10 +1280,14 @@ void
 roc_sso_rsrc_fini(struct roc_sso *roc_sso)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	uint32_t cnt;
 
 	if (!roc_sso->nb_hws && !roc_sso->nb_hwgrp)
 		return;
 
+	for (cnt = 0; cnt < roc_sso->nb_hwgrp; cnt++)
+		roc_sso_hwgrp_agq_release(roc_sso, cnt);
+
 	sso_unregister_irqs_priv(roc_sso, sso->pci_dev->intr_handle,
 				 roc_sso->nb_hws, roc_sso->nb_hwgrp);
 	sso_lf_free(&sso->dev, SSO_LF_TYPE_HWS, roc_sso->nb_hws);
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 021db22c86..f73128087a 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -47,6 +47,17 @@ struct roc_sso_xaq_data {
 	void *mem;
 };
 
+struct roc_sso_agq_data {
+	uint8_t tt;
+	uint8_t cnt_ena;
+	uint8_t xqe_type;
+	uint16_t stag;
+	uint32_t tag;
+	uint32_t vwqe_max_sz_exp;
+	uint64_t vwqe_wait_tmo;
+	uint64_t vwqe_aura;
+};
+
 struct roc_sso {
 	struct plt_pci_device *pci_dev;
 	/* Public data. */
@@ -100,6 +111,12 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 					 uint16_t nb_stash);
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
+int __roc_api roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp,
+				      struct roc_sso_agq_data *data);
+void __roc_api roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id);
+void __roc_api roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp);
+uint32_t __roc_api roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag,
+					      uint8_t xqe_type);
 
 /* Utility function */
 uint16_t __roc_api roc_sso_pf_func_get(void);
@@ -107,7 +124,7 @@ uint16_t __roc_api roc_sso_pf_func_get(void);
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
-int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
+int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp,
 				      struct roc_sso_hwgrp_stats *stats);
 int __roc_api roc_sso_hws_stats_get(struct roc_sso *roc_sso, uint8_t hws,
 				    struct roc_sso_hws_stats *stats);
diff --git a/drivers/common/cnxk/roc_sso_priv.h b/drivers/common/cnxk/roc_sso_priv.h
index 21c59c57e6..d6dc6dedd3 100644
--- a/drivers/common/cnxk/roc_sso_priv.h
+++ b/drivers/common/cnxk/roc_sso_priv.h
@@ -13,6 +13,10 @@ struct sso_rsrc {
 struct sso {
 	struct plt_pci_device *pci_dev;
 	struct dev dev;
+	/* EVA memory area */
+	uintptr_t agg_mem[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_used[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_cnt[MAX_RVU_BLKLF_CNT];
 	/* Interrupt handler args. */
 	struct sso_rsrc hws_rsrc[MAX_RVU_BLKLF_CNT];
 	struct sso_rsrc hwgrp_rsrc[MAX_RVU_BLKLF_CNT];
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index de748ac409..14ee6031e2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -500,6 +500,10 @@ INTERNAL {
 	roc_sso_dev_fini;
 	roc_sso_dev_init;
 	roc_sso_dump;
+	roc_sso_hwgrp_agq_alloc;
+	roc_sso_hwgrp_agq_free;
+	roc_sso_hwgrp_agq_from_tag;
+	roc_sso_hwgrp_agq_release;
 	roc_sso_hwgrp_alloc_xaq;
 	roc_sso_hwgrp_base_get;
 	roc_sso_hwgrp_free_xaq_aura;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 20/22] event/cnxk: add Rx/Tx event vector support
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (17 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 19/22] common/cnxk: add SSO event aggregator pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 21/22] common/cnxk: update timer base code pbhagavatula
                       ` (2 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add Event vector support for CN20K Rx/Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c      | 185 ++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_tx_worker.h     |  84 ++++++++++
 drivers/event/cnxk/cn20k_worker.h        |  63 ++++++++
 drivers/event/cnxk/cnxk_eventdev.h       |   3 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  16 +-
 5 files changed, 340 insertions(+), 11 deletions(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 3c95247499..e525ab2a9c 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -74,6 +74,7 @@ cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->xaq_lmt = dev->xaq_lmt;
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = dev->sso.lmt_base;
+	ws->lmt_base = dev->sso.lmt_base;
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -594,7 +595,8 @@ cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
 	else
 		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
 			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
-			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -640,6 +642,156 @@ cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
 	eventdev_fops_tstamp_update(event_dev);
 }
 
+static int
+cn20k_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, uint16_t port_id,
+		     const struct rte_event_eth_rx_adapter_queue_conf *queue_conf, int agq)
+{
+	struct roc_nix_rq *rq;
+	uint32_t tag_mask;
+	uint16_t wqe_skip;
+	uint8_t tt;
+	int rc;
+
+	rq = &cnxk_eth_dev->rqs[rq_id];
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+		tag_mask = agq;
+		tt = SSO_TT_AGG;
+		rq->flow_tag_width = 0;
+	} else {
+		tag_mask = (port_id & 0xFF) << 20;
+		tag_mask |= (RTE_EVENT_TYPE_ETHDEV << 28);
+		tt = queue_conf->ev.sched_type;
+		rq->flow_tag_width = 20;
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) {
+			rq->flow_tag_width = 0;
+			tag_mask |= queue_conf->ev.flow_id;
+		}
+	}
+
+	rq->tag_mask = tag_mask;
+	rq->sso_ena = 1;
+	rq->tt = tt;
+	rq->hwgrp = queue_conf->ev.queue_id;
+	wqe_skip = RTE_ALIGN_CEIL(sizeof(struct rte_mbuf), ROC_CACHE_LINE_SZ);
+	wqe_skip = wqe_skip / ROC_CACHE_LINE_SZ;
+	rq->wqe_skip = wqe_skip;
+
+	rc = roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+	return rc;
+}
+
+static int
+cn20k_sso_rx_adapter_vwqe_enable(struct cnxk_sso_evdev *dev, uint16_t port_id, uint16_t rq_id,
+				 const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	uint32_t agq, tag_mask, stag_mask;
+	struct roc_sso_agq_data data;
+	int rc;
+
+	tag_mask = (port_id & 0xff) << 20;
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)
+		tag_mask |= queue_conf->ev.flow_id;
+	else
+		tag_mask |= rq_id;
+
+	stag_mask = tag_mask;
+	tag_mask |= RTE_EVENT_TYPE_ETHDEV_VECTOR << 28;
+	stag_mask |= RTE_EVENT_TYPE_ETHDEV << 28;
+
+	memset(&data, 0, sizeof(struct roc_sso_agq_data));
+	data.tag = tag_mask;
+	data.tt = queue_conf->ev.sched_type;
+	data.stag = stag_mask;
+	data.vwqe_aura = roc_npa_aura_handle_to_aura(queue_conf->vector_mp->pool_id);
+	data.vwqe_max_sz_exp = rte_log2_u32(queue_conf->vector_sz);
+	data.vwqe_wait_tmo = queue_conf->vector_timeout_ns / ((SSO_AGGR_DEF_TMO + 1) * 100);
+	data.xqe_type = 0;
+
+	rc = roc_sso_hwgrp_agq_alloc(&dev->sso, queue_conf->ev.queue_id, &data);
+	if (rc < 0)
+		return rc;
+
+	agq = roc_sso_hwgrp_agq_from_tag(&dev->sso, queue_conf->ev.queue_id, tag_mask, 0);
+	return agq;
+}
+
+static int
+cn20k_rx_adapter_queue_add(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id,
+			   const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t port = eth_dev->data->port_id;
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	int i, rc = 0, agq = 0;
+
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			rc |= cn20k_rx_adapter_queue_add(event_dev, eth_dev, i, queue_conf);
+	} else {
+		rxq_sp = cnxk_eth_rxq_to_sp(eth_dev->data->rx_queues[rx_queue_id]);
+		cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+		rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+			cnxk_sso_updt_xae_cnt(dev, queue_conf->vector_mp,
+					      RTE_EVENT_TYPE_ETHDEV_VECTOR);
+			rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+			if (rc < 0)
+				return rc;
+
+			rc = cn20k_sso_rx_adapter_vwqe_enable(dev, port, rx_queue_id, queue_conf);
+			if (rc < 0)
+				return rc;
+			agq = rc;
+		}
+
+		rc = cn20k_sso_rxq_enable(cnxk_eth_dev, (uint16_t)rx_queue_id, port, queue_conf,
+					  agq);
+
+		/* Propagate force bp devarg */
+		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
+		cnxk_sso_tstamp_cfg(port, eth_dev, dev);
+		cnxk_eth_dev->nb_rxq_sso++;
+	}
+
+	if (rc < 0) {
+		plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+			queue_conf->ev.queue_id);
+		return rc;
+	}
+
+	dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+	return 0;
+}
+
+static int
+cn20k_rx_adapter_queue_del(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_nix_rq *rxq;
+	int i, rc = 0;
+
+	RTE_SET_USED(event_dev);
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			cn20k_rx_adapter_queue_del(event_dev, eth_dev, i);
+	} else {
+		rxq = &cnxk_eth_dev->rqs[rx_queue_id];
+		if (rxq->tt == SSO_TT_AGG)
+			roc_sso_hwgrp_agq_free(&dev->sso, rxq->hwgrp, rxq->tag_mask);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
+		cnxk_eth_dev->nb_rxq_sso--;
+	}
+
+	if (rc < 0)
+		plt_err("Failed to clear Rx adapter config port=%d, q=%d", eth_dev->data->port_id,
+			rx_queue_id);
+	return rc;
+}
+
 static int
 cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
@@ -656,7 +808,7 @@ cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	rc = cn20k_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
 	if (rc)
 		return -EINVAL;
 
@@ -689,7 +841,29 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+	return cn20k_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
+static int
+cn20k_sso_rx_adapter_vector_limits(const struct rte_eventdev *dev,
+				   const struct rte_eth_dev *eth_dev,
+				   struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	RTE_SET_USED(eth_dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		return -ENOTSUP;
+
+	limits->log2_sz = true;
+	limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+	limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+	limits->min_timeout_ns = (SSO_AGGR_DEF_TMO + 1) * 100;
+	limits->max_timeout_ns = (BITMASK_ULL(11, 0) + 1) * limits->min_timeout_ns;
+
+	return 0;
 }
 
 static int
@@ -703,7 +877,8 @@ cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_e
 	if (ret)
 		*caps = 0;
 	else
-		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -806,6 +981,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_rx_adapter_vector_limits_get = cn20k_sso_rx_adapter_vector_limits,
+
 	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
 	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
 	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index c8ab560b0e..b09d845b09 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -139,10 +139,58 @@ cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, ui
 	return 1;
 }
 
+static __rte_always_inline uint16_t
+cn20k_sso_vwqe_split_tx(struct cn20k_sso_hws *ws, struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+			uint64_t *cmd, const uint64_t *txq_data, const uint32_t flags)
+{
+	uint16_t count = 0, port, queue, ret = 0, last_idx = 0;
+	struct cn20k_eth_txq *txq;
+	int32_t space;
+	int i;
+
+	port = mbufs[0]->port;
+	queue = rte_event_eth_tx_adapter_txq_get(mbufs[0]);
+	for (i = 0; i < nb_mbufs; i++) {
+		if (port != mbufs[i]->port || queue != rte_event_eth_tx_adapter_txq_get(mbufs[i])) {
+			if (count) {
+				txq = (struct cn20k_eth_txq
+					       *)(txq_data[(txq_data[port] >> 48) + queue] &
+						  (BIT_ULL(48) - 1));
+				/* Transmit based on queue depth */
+				space = cn20k_sso_sq_depth(txq);
+				if (space < count)
+					goto done;
+				cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx],
+							   count, cmd, flags | NIX_TX_VWQE_F);
+				ret += count;
+				count = 0;
+			}
+			port = mbufs[i]->port;
+			queue = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+			last_idx = i;
+		}
+		count++;
+	}
+	if (count) {
+		txq = (struct cn20k_eth_txq *)(txq_data[(txq_data[port] >> 48) + queue] &
+					       (BIT_ULL(48) - 1));
+		/* Transmit based on queue depth */
+		space = cn20k_sso_sq_depth(txq);
+		if (space < count)
+			goto done;
+		cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx], count, cmd,
+					   flags | NIX_TX_VWQE_F);
+		ret += count;
+	}
+done:
+	return ret;
+}
+
 static __rte_always_inline uint16_t
 cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
 		       const uint64_t *txq_data, const uint32_t flags)
 {
+	struct cn20k_eth_txq *txq;
 	struct rte_mbuf *m;
 	uintptr_t lmt_addr;
 	uint16_t lmt_id;
@@ -150,6 +198,42 @@ cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t
 	lmt_addr = ws->lmt_base;
 	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
 
+	if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+		struct rte_mbuf **mbufs = ev->vec->mbufs;
+		uint64_t meta = *(uint64_t *)ev->vec;
+		uint16_t offset, nb_pkts, left;
+		int32_t space;
+
+		nb_pkts = meta & 0xFFFF;
+		offset = (meta >> 16) & 0xFFF;
+		if (meta & BIT(31)) {
+			txq = (struct cn20k_eth_txq
+				       *)(txq_data[(txq_data[meta >> 32] >> 48) + (meta >> 48)] &
+					  (BIT_ULL(48) - 1));
+
+			/* Transmit based on queue depth */
+			space = cn20k_sso_sq_depth(txq);
+			if (space <= 0)
+				return 0;
+			nb_pkts = nb_pkts < space ? nb_pkts : (uint16_t)space;
+			cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs + offset, nb_pkts,
+						   cmd, flags | NIX_TX_VWQE_F);
+		} else {
+			nb_pkts = cn20k_sso_vwqe_split_tx(ws, mbufs + offset, nb_pkts, cmd,
+							  txq_data, flags);
+		}
+		left = (meta & 0xFFFF) - nb_pkts;
+
+		if (!left) {
+			rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+		} else {
+			*(uint64_t *)ev->vec =
+				(meta & ~0xFFFFFFFUL) | (((uint32_t)nb_pkts + offset) << 16) | left;
+		}
+		rte_prefetch0(ws);
+		return !left;
+	}
+
 	m = ev->mbuf;
 	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
 }
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 9075073fd2..5799e5cc49 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -41,6 +41,58 @@ cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info
 	}
 }
 
+static __rte_always_inline void
+cn20k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, struct cn20k_sso_hws *ws)
+{
+	uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM;
+	struct cnxk_timesync_info *tstamp = ws->tstamp[port_id];
+	void *lookup_mem = ws->lookup_mem;
+	uintptr_t lbase = ws->lmt_base;
+	struct rte_event_vector *vec;
+	uint16_t nb_mbufs, non_vec;
+	struct rte_mbuf **wqe;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+	int i;
+
+	mbuf_init |= ((uint64_t)port_id) << 48;
+	vec = (struct rte_event_vector *)vwqe;
+	wqe = vec->mbufs;
+
+	rte_prefetch0(&vec->ptrs[0]);
+#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
+	for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
+		rte_prefetch0(&vec->ptrs[i]);
+
+	if (flags & NIX_RX_OFFLOAD_TSTAMP_F && tstamp)
+		mbuf_init |= 8;
+
+	nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+	nb_mbufs = cn20k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs, flags | NIX_RX_VWQE_F,
+					      lookup_mem, tstamp, lbase, 0);
+	wqe += nb_mbufs;
+	non_vec = vec->nb_elem - nb_mbufs;
+
+	while (non_vec) {
+		struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+
+		mbuf = (struct rte_mbuf *)((char *)cqe - sizeof(struct rte_mbuf));
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp((uint64_t)wqe[0], (uint64_t)mbuf, tstamp);
+		wqe[0] = (struct rte_mbuf *)mbuf;
+		non_vec--;
+		wqe++;
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
@@ -65,6 +117,17 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
 			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
 		u64[1] = mbuf;
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		__uint128_t vwqe_hdr = *(__uint128_t *)u64[1];
+
+		vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | ((vwqe_hdr & 0xFFFF) << 48) |
+			   ((uint64_t)port << 32);
+		*(uint64_t *)u64[1] = (uint64_t)vwqe_hdr;
+		cn20k_process_vwqe(u64[1], port, flags, ws);
+		/* Mark vector mempool object as get */
+		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
+					  1);
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 4066497e6b..33b3538753 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -266,6 +266,9 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
 			      const struct rte_eth_dev *eth_dev);
 int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
 			     const struct rte_eth_dev *eth_dev);
+void cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev,
+			 struct cnxk_sso_evdev *dev);
+int cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id);
 int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
 				  const struct rte_eth_dev *eth_dev,
 				  int32_t tx_queue_id);
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3cac42111a..4cf48db74c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -167,9 +167,10 @@ cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
 	return rc;
 }
 
-static int
-cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+int
+cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
 	struct roc_nix_rq *rq;
 
 	rq = &cnxk_eth_dev->rqs[rq_id];
@@ -209,10 +210,11 @@ cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
 	return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
 }
 
-static void
-cnxk_sso_tstamp_cfg(uint16_t port_id, struct cnxk_eth_dev *cnxk_eth_dev,
-		    struct cnxk_sso_evdev *dev)
+void
+cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev, struct cnxk_sso_evdev *dev)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+
 	if (cnxk_eth_dev->rx_offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP || cnxk_eth_dev->ptp_en)
 		dev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
 }
@@ -263,7 +265,7 @@ cnxk_sso_rx_adapter_queue_add(
 
 		/* Propagate force bp devarg */
 		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
-		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, cnxk_eth_dev, dev);
+		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, eth_dev, dev);
 		cnxk_eth_dev->nb_rxq_sso++;
 	}
 
@@ -290,7 +292,7 @@ cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
 			cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
 	} else {
-		rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
 		cnxk_eth_dev->nb_rxq_sso--;
 
 		/* Enable drop_re if it was disabled earlier */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 21/22] common/cnxk: update timer base code
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (18 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22  8:46     ` [PATCH v3 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Update event timer base code to support configuring
HW accelerated timer arm and cancel.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/tim.h        |  5 ++
 drivers/common/cnxk/roc_mbox.h      | 38 ++++++++++++-
 drivers/common/cnxk/roc_tim.c       | 84 ++++++++++++++++++++++++++---
 drivers/common/cnxk/roc_tim.h       | 20 +++++--
 drivers/common/cnxk/version.map     |  1 +
 drivers/event/cnxk/cnxk_tim_evdev.h |  5 --
 6 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index 82b094e3dc..75700a11b8 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -47,10 +47,15 @@
 #define TIM_LF_RAS_INT_ENA_W1S	   (0x310)
 #define TIM_LF_RAS_INT_ENA_W1C	   (0x318)
 #define TIM_LF_RING_REL		   (0x400)
+#define TIM_LF_SCHED_TIMER0	   (0x480)
+#define TIM_LF_RING_FIRST_EXPIRY   (0x558)
 
 #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
+#define TIM_MAX_INTERVAL_EXT_TICKS ((1ULL << 34) - 1)
 #define TIM_MAX_BUCKET_SIZE    ((1ULL << 20) - 2)
 #define TIM_MIN_BUCKET_SIZE    1
 #define TIM_BUCKET_WRAP_SIZE   3
+#define TIM_BUCKET_MIN_GAP     1
+#define TIM_NPA_TMO            0xFFFF
 
 #endif /* __TIM_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index db6e8f07b3..8c0e274684 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -164,6 +164,9 @@ struct mbox_msghdr {
 	  tim_intvl_rsp)                                                       \
 	M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req,          \
 	  tim_capture_rsp)                                                     \
+	M(TIM_CONFIG_HWWQE,    0x807, tim_config_hwwqe, tim_cfg_hwwqe_req,     \
+	  msg_rsp)                                                             \
+	M(TIM_GET_HW_INFO,     0x808, tim_get_hw_info, msg_req, tim_hw_info)   \
 	/* CPT mbox IDs (range 0xA00 - 0xBFF) */                               \
 	M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)    \
 	M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)                   \
@@ -2803,6 +2806,7 @@ enum tim_af_status {
 	TIM_AF_INVALID_ENABLE_DONTFREE = -815,
 	TIM_AF_ENA_DONTFRE_NSET_PERIODIC = -816,
 	TIM_AF_RING_ALREADY_DISABLED = -817,
+	TIM_AF_LF_START_SYNC_FAIL = -818,
 };
 
 enum tim_clk_srcs {
@@ -2895,13 +2899,43 @@ struct tim_config_req {
 	uint8_t __io enabledontfreebuffer;
 	uint32_t __io bucketsize;
 	uint32_t __io chunksize;
-	uint32_t __io interval;
+	uint32_t __io interval_lo;
 	uint8_t __io gpioedge;
-	uint8_t __io rsvd[7];
+	uint8_t __io rsvd[3];
+	uint32_t __io interval_hi;
 	uint64_t __io intervalns;
 	uint64_t __io clockfreq;
 };
 
+struct tim_cfg_hwwqe_req {
+	struct mbox_msghdr hdr;
+	uint16_t __io ring;
+	uint8_t __io grp_ena;
+	uint8_t __io hwwqe_ena;
+	uint8_t __io ins_min_gap;
+	uint8_t __io flw_ctrl_ena;
+	uint8_t __io wqe_rd_clr_ena;
+	uint16_t __io grp_tmo_cntr;
+	uint16_t __io npa_tmo_cntr;
+	uint16_t __io result_offset;
+	uint16_t __io event_count_offset;
+	uint64_t __io rsvd[2];
+};
+
+struct tim_feat_info {
+	uint16_t __io rings;
+	uint8_t __io engines;
+	uint8_t __io hwwqe : 1;
+	uint8_t __io intvl_ext : 1;
+	uint8_t __io rsvd8[4];
+	uint64_t __io rsvd[2];
+};
+
+struct tim_hw_info {
+	struct mbox_msghdr hdr;
+	struct tim_feat_info feat;
+};
+
 struct tim_lf_alloc_rsp {
 	struct mbox_msghdr hdr;
 	uint64_t __io tenns_clk;
diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index 83228fb2b6..e326ea0122 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+#define LF_ENABLE_RETRY_CNT 8
+
 static int
 tim_fill_msix(struct roc_tim *roc_tim, uint16_t nb_ring)
 {
@@ -86,8 +88,11 @@ tim_err_desc(int rc)
 	case TIM_AF_RING_ALREADY_DISABLED:
 		plt_err("Ring already stopped");
 		break;
+	case TIM_AF_LF_START_SYNC_FAIL:
+		plt_err("Ring start sync failed.");
+		break;
 	default:
-		plt_err("Unknown Error.");
+		plt_err("Unknown Error: %d", rc);
 	}
 }
 
@@ -123,10 +128,12 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
 	struct mbox *mbox = mbox_get(dev->mbox);
+	uint8_t retry_cnt = LF_ENABLE_RETRY_CNT;
 	struct tim_enable_rsp *rsp;
 	struct tim_ring_req *req;
 	int rc = -ENOSPC;
 
+retry:
 	req = mbox_alloc_msg_tim_enable_ring(mbox);
 	if (req == NULL)
 		goto fail;
@@ -134,6 +141,9 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 
 	rc = mbox_process_msg(dev->mbox, (void **)&rsp);
 	if (rc) {
+		if (rc == TIM_AF_LF_START_SYNC_FAIL && retry_cnt--)
+			goto retry;
+
 		tim_err_desc(rc);
 		rc = -EIO;
 		goto fail;
@@ -183,10 +193,9 @@ roc_tim_lf_base_get(struct roc_tim *roc_tim, uint8_t ring_id)
 }
 
 int
-roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-		  enum roc_tim_clk_src clk_src, uint8_t ena_periodic,
-		  uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
-		  uint32_t interval, uint64_t intervalns, uint64_t clockfreq)
+roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id, enum roc_tim_clk_src clk_src,
+		  uint8_t ena_periodic, uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
+		  uint64_t interval, uint64_t intervalns, uint64_t clockfreq)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
@@ -204,7 +213,8 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	req->clocksource = clk_src;
 	req->enableperiodic = ena_periodic;
 	req->enabledontfreebuffer = ena_dfb;
-	req->interval = interval;
+	req->interval_lo = interval;
+	req->interval_hi = interval >> 32;
 	req->intervalns = intervalns;
 	req->clockfreq = clockfreq;
 	req->gpioedge = TIM_GPIO_LTOH_TRANS;
@@ -220,6 +230,41 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	return rc;
 }
 
+int
+roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id, struct roc_tim_hwwqe_cfg *cfg)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
+	struct dev *dev = &sso->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_cfg_hwwqe_req *req;
+	int rc = -ENOSPC;
+
+	req = mbox_alloc_msg_tim_config_hwwqe(mbox);
+	if (req == NULL)
+		goto fail;
+	req->ring = ring_id;
+	req->hwwqe_ena = cfg->hwwqe_ena;
+	req->grp_ena = cfg->grp_ena;
+	req->grp_tmo_cntr = cfg->grp_tmo_cyc;
+	req->flw_ctrl_ena = cfg->flw_ctrl_ena;
+	req->result_offset = cfg->result_offset;
+	req->event_count_offset = cfg->event_count_offset;
+
+	req->wqe_rd_clr_ena = 1;
+	req->npa_tmo_cntr = TIM_NPA_TMO;
+	req->ins_min_gap = TIM_BUCKET_MIN_GAP;
+
+	rc = mbox_process(mbox);
+	if (rc) {
+		tim_err_desc(rc);
+		rc = -EIO;
+	}
+
+fail:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_lf_interval(struct roc_tim *roc_tim, enum roc_tim_clk_src clk_src,
 		    uint64_t clockfreq, uint64_t *intervalns,
@@ -353,6 +398,31 @@ tim_free_lf_count_get(struct dev *dev, uint16_t *nb_lfs)
 	return 0;
 }
 
+static int
+tim_hw_info_get(struct roc_tim *roc_tim)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_tim->roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_tim_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc != MBOX_MSG_INVALID)
+		mbox_memcpy(&roc_tim->feat, &rsp->feat, sizeof(roc_tim->feat));
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_init(struct roc_tim *roc_tim)
 {
@@ -372,6 +442,8 @@ roc_tim_init(struct roc_tim *roc_tim)
 	PLT_STATIC_ASSERT(sizeof(struct tim) <= TIM_MEM_SZ);
 	nb_lfs = roc_tim->nb_lfs;
 
+	rc = tim_hw_info_get(roc_tim);
+
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
 		plt_tim_dbg("Failed to get TIM resource count");
diff --git a/drivers/common/cnxk/roc_tim.h b/drivers/common/cnxk/roc_tim.h
index f9a9ad1887..2eb6e6962b 100644
--- a/drivers/common/cnxk/roc_tim.h
+++ b/drivers/common/cnxk/roc_tim.h
@@ -19,10 +19,20 @@ enum roc_tim_clk_src {
 	ROC_TIM_CLK_SRC_INVALID,
 };
 
+struct roc_tim_hwwqe_cfg {
+	uint8_t grp_ena;
+	uint8_t hwwqe_ena;
+	uint8_t flw_ctrl_ena;
+	uint16_t grp_tmo_cyc;
+	uint16_t result_offset;
+	uint16_t event_count_offset;
+};
+
 struct roc_tim {
 	struct roc_sso *roc_sso;
 	/* Public data. */
 	uint16_t nb_lfs;
+	struct tim_feat_info feat;
 	/* Private data. */
 #define TIM_MEM_SZ (1 * 1024)
 	uint8_t reserved[TIM_MEM_SZ] __plt_cache_aligned;
@@ -36,11 +46,11 @@ int __roc_api roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id,
 				uint64_t *start_tsc, uint32_t *cur_bkt);
 int __roc_api roc_tim_lf_disable(struct roc_tim *roc_tim, uint8_t ring_id);
 int __roc_api roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-				enum roc_tim_clk_src clk_src,
-				uint8_t ena_periodic, uint8_t ena_dfb,
-				uint32_t bucket_sz, uint32_t chunk_sz,
-				uint32_t interval, uint64_t intervalns,
-				uint64_t clockfreq);
+				enum roc_tim_clk_src clk_src, uint8_t ena_periodic, uint8_t ena_dfb,
+				uint32_t bucket_sz, uint32_t chunk_sz, uint64_t interval,
+				uint64_t intervalns, uint64_t clockfreq);
+int __roc_api roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id,
+				      struct roc_tim_hwwqe_cfg *cfg);
 int __roc_api roc_tim_lf_interval(struct roc_tim *roc_tim,
 				  enum roc_tim_clk_src clk_src,
 				  uint64_t clockfreq, uint64_t *intervalns,
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 14ee6031e2..e7381ae8b2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -528,6 +528,7 @@ INTERNAL {
 	roc_tim_lf_alloc;
 	roc_tim_lf_base_get;
 	roc_tim_lf_config;
+	roc_tim_lf_config_hwwqe;
 	roc_tim_lf_disable;
 	roc_tim_lf_enable;
 	roc_tim_lf_free;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index f4c61dfb44..c5b3d67eb8 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -16,11 +16,6 @@
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
 
-#include "hw/tim.h"
-
-#include "roc_model.h"
-#include "roc_tim.h"
-
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
 #define TICK2NSEC(__tck, __freq) (((__tck)*NSECPERSEC) / (__freq))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v3 22/22] event/cnxk: add CN20K timer adapter
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (19 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 21/22] common/cnxk: update timer base code pbhagavatula
@ 2024-10-22  8:46     ` pbhagavatula
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22  8:46 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event timer adapter support for CN20K platform.
Implement new HWWQE insertion feature supported by CN20K platform.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_tim.c        |   6 +-
 drivers/event/cnxk/cn20k_eventdev.c  |  16 ++-
 drivers/event/cnxk/cn20k_worker.h    |   6 +
 drivers/event/cnxk/cnxk_tim_evdev.c  |  37 ++++-
 drivers/event/cnxk/cnxk_tim_evdev.h  |  14 ++
 drivers/event/cnxk/cnxk_tim_worker.c |  82 +++++++++--
 drivers/event/cnxk/cnxk_tim_worker.h | 201 +++++++++++++++++++++++++++
 7 files changed, 350 insertions(+), 12 deletions(-)

diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index e326ea0122..a1461fedb1 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -409,7 +409,7 @@ tim_hw_info_get(struct roc_tim *roc_tim)
 	mbox_alloc_msg_tim_get_hw_info(mbox);
 	rc = mbox_process_msg(mbox, (void **)&rsp);
 	if (rc && rc != MBOX_MSG_INVALID) {
-		plt_err("Failed to get SSO HW info");
+		plt_err("Failed to get TIM HW info");
 		rc = -EIO;
 		goto exit;
 	}
@@ -443,6 +443,10 @@ roc_tim_init(struct roc_tim *roc_tim)
 	nb_lfs = roc_tim->nb_lfs;
 
 	rc = tim_hw_info_get(roc_tim);
+	if (rc) {
+		plt_tim_dbg("Failed to get TIM HW info");
+		return 0;
+	}
 
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index e525ab2a9c..8c02f02acf 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -956,6 +956,13 @@ cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
 	return cn20k_sso_updt_tx_adptr_data(event_dev);
 }
 
+static int
+cn20k_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags, uint32_t *caps,
+		   const struct event_timer_adapter_ops **ops)
+{
+	return cnxk_tim_caps_get(evdev, flags, caps, ops, cn20k_sso_set_priv_mem);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -990,6 +997,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
 	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
 
+	.timer_adapter_caps_get = cn20k_tim_caps_get,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -1067,4 +1076,9 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_STASH "=<string>"
-			      CNXK_SSO_FORCE_BP "=1");
+			      CNXK_SSO_FORCE_BP "=1"
+			      CNXK_TIM_DISABLE_NPA "=1"
+			      CNXK_TIM_CHNK_SLOTS "=<int>"
+			      CNXK_TIM_RINGS_LMT "=<int>"
+			      CNXK_TIM_STATS_ENA "=1"
+			      CNXK_TIM_EXT_CLK "=<string>");
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5799e5cc49..b014e549b9 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -5,6 +5,7 @@
 #ifndef __CN20K_WORKER_H__
 #define __CN20K_WORKER_H__
 
+#include <rte_event_timer_adapter.h>
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
@@ -128,6 +129,11 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		/* Mark vector mempool object as get */
 		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
 					  1);
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_TIMER) {
+		struct rte_event_timer *tev = (struct rte_event_timer *)u64[1];
+
+		tev->state = RTE_EVENT_TIMER_NOT_ARMED;
+		u64[1] = tev->ev.u64;
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 27a4dfb490..994d1d1090 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -78,9 +78,25 @@ cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
 	return rc;
 }
 
+static int
+cnxk_tim_enable_hwwqe(struct cnxk_tim_evdev *dev, struct cnxk_tim_ring *tim_ring)
+{
+	struct roc_tim_hwwqe_cfg hwwqe_cfg;
+
+	memset(&hwwqe_cfg, 0, sizeof(hwwqe_cfg));
+	hwwqe_cfg.hwwqe_ena = 1;
+	hwwqe_cfg.grp_ena = 0;
+	hwwqe_cfg.flw_ctrl_ena = 0;
+	hwwqe_cfg.result_offset = CNXK_TIM_HWWQE_RES_OFFSET_B;
+
+	tim_ring->lmt_base = dev->tim.roc_sso->lmt_base;
+	return roc_tim_lf_config_hwwqe(&dev->tim, tim_ring->ring_id, &hwwqe_cfg);
+}
+
 static void
 cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 {
+	struct cnxk_tim_evdev *dev = cnxk_tim_priv_get();
 	uint8_t prod_flag = !tim_ring->prod_type_sp;
 
 	/* [STATS] [DFB/FB] [SP][MP]*/
@@ -98,6 +114,16 @@ cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 #undef FP
 	};
 
+	if (dev == NULL)
+		return;
+
+	if (dev->tim.feat.hwwqe) {
+		cnxk_tim_ops.arm_burst = cnxk_tim_arm_burst_hwwqe;
+		cnxk_tim_ops.arm_tmo_tick_burst = cnxk_tim_arm_tmo_burst_hwwqe;
+		cnxk_tim_ops.cancel_burst = cnxk_tim_timer_cancel_burst_hwwqe;
+		return;
+	}
+
 	cnxk_tim_ops.arm_burst =
 		arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag];
 	cnxk_tim_ops.arm_tmo_tick_burst =
@@ -224,12 +250,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		}
 	}
 
-	if (tim_ring->disable_npa) {
+	if (!dev->tim.feat.hwwqe && tim_ring->disable_npa) {
 		tim_ring->nb_chunks =
 			tim_ring->nb_timers /
 			CNXK_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
 		tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts;
 	} else {
+		tim_ring->disable_npa = 0;
 		tim_ring->nb_chunks = tim_ring->nb_timers;
 	}
 
@@ -255,6 +282,14 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		goto tim_chnk_free;
 	}
 
+	if (dev->tim.feat.hwwqe) {
+		rc = cnxk_tim_enable_hwwqe(dev, tim_ring);
+		if (rc < 0) {
+			plt_err("Failed to enable hwwqe");
+			goto tim_chnk_free;
+		}
+	}
+
 	plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
 	plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index c5b3d67eb8..114a89ee5a 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -15,6 +15,7 @@
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
+#include <rte_vect.h>
 
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
@@ -29,6 +30,8 @@
 #define CNXK_TIM_MIN_CHUNK_SLOTS    (0x1)
 #define CNXK_TIM_MAX_CHUNK_SLOTS    (0x1FFE)
 #define CNXK_TIM_MAX_POOL_CACHE_SZ  (16)
+#define CNXK_TIM_HWWQE_RES_OFFSET_B (24)
+#define CNXK_TIM_ENT_PER_LMT	    (7)
 
 #define CN9K_TIM_MIN_TMO_TKS (256)
 
@@ -124,6 +127,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	uintptr_t tbase;
 	uint64_t (*tick_fn)(uint64_t tbase);
 	uint64_t ring_start_cyc;
+	uint64_t lmt_base;
 	struct cnxk_tim_bkt *bkt;
 	struct rte_mempool *chunk_pool;
 	struct rte_reciprocal_u64 fast_div;
@@ -310,11 +314,21 @@ TIM_ARM_FASTPATH_MODES
 TIM_ARM_TMO_FASTPATH_MODES
 #undef FP
 
+uint16_t cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers);
+
+uint16_t cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				      struct rte_event_timer **tim, const uint64_t timeout_tick,
+				      const uint16_t nb_timers);
+
 uint16_t
 cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 			    struct rte_event_timer **tim,
 			    const uint16_t nb_timers);
 
+uint16_t cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+					   struct rte_event_timer **tim, const uint16_t nb_timers);
+
 int cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 				 const struct rte_event_timer *evtim, uint64_t *ticks_remaining);
 
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 5e96f6f188..42d376d375 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -32,15 +32,6 @@ cnxk_tim_arm_checks(const struct cnxk_tim_ring *const tim_ring,
 	return -EINVAL;
 }
 
-static inline void
-cnxk_tim_format_event(const struct rte_event_timer *const tim,
-		      struct cnxk_tim_ent *const entry)
-{
-	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 |
-		    (tim->ev.event & 0xFFFFFFFFF);
-	entry->wqe = tim->ev.u64;
-}
-
 static __rte_always_inline uint16_t
 cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 			 struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -77,6 +68,24 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr, struct rte_event_timer **tim,
+			 const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t index;
+
+	for (index = 0; index < nb_timers; index++) {
+		if (cnxk_tim_arm_checks(tim_ring, tim[index]))
+			break;
+
+		if (cnxk_tim_add_entry_hwwqe(tim_ring, tim[index]))
+			break;
+	}
+
+	return index;
+}
+
 #define FP(_name, _f3, _f2, _f1, _flags)                                       \
 	uint16_t __rte_noinline cnxk_tim_arm_burst_##_name(                    \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -132,6 +141,29 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	return set_timers;
 }
 
+uint16_t
+cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+			     struct rte_event_timer **tim, const uint64_t timeout_tick,
+			     const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t idx;
+
+	if (unlikely(!timeout_tick || timeout_tick > tim_ring->nb_bkts)) {
+		const enum rte_event_timer_state state = timeout_tick ?
+								 RTE_EVENT_TIMER_ERROR_TOOLATE :
+								 RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		for (idx = 0; idx < nb_timers; idx++)
+			tim[idx]->state = state;
+
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	return cnxk_tim_add_entry_tmo_hwwqe(tim_ring, tim, timeout_tick * tim_ring->tck_int,
+					    nb_timers);
+}
+
 #define FP(_name, _f2, _f1, _flags)                                            \
 	uint16_t __rte_noinline cnxk_tim_arm_tmo_tick_burst_##_name(           \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -174,6 +206,38 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t i;
+
+	RTE_SET_USED(adptr);
+	for (i = 0; i < nb_timers; i++) {
+		if (tim[i]->state == RTE_EVENT_TIMER_CANCELED) {
+			rte_errno = EALREADY;
+			break;
+		}
+
+		if (tim[i]->state != RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		status = (uint64_t __rte_atomic *)&tim[i]->impl_opaque[1];
+		if (!rte_atomic_compare_exchange_strong_explicit(status, (uint64_t *)&tim[i], 0,
+								 rte_memory_order_release,
+								 rte_memory_order_relaxed)) {
+			rte_errno = ENOENT;
+			break;
+		}
+		tim[i]->state = RTE_EVENT_TIMER_CANCELED;
+	}
+
+	return i;
+}
+
 int
 cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 			     const struct rte_event_timer *evtim, uint64_t *ticks_remaining)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index e52eadbc08..be6744db51 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -132,6 +132,13 @@ cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
 	return (n - (d * rte_reciprocal_divide_u64(n, &R)));
 }
 
+static inline void
+cnxk_tim_format_event(const struct rte_event_timer *const tim, struct cnxk_tim_ent *const entry)
+{
+	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	entry->wqe = tim->ev.u64;
+}
+
 static __rte_always_inline void
 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
 			   const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
@@ -573,6 +580,200 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 	return nb_timers;
 }
 
+static int
+cnxk_tim_add_entry_hwwqe(struct cnxk_tim_ring *const tim_ring, struct rte_event_timer *const tim)
+{
+	uint64_t __rte_atomic *status;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+	int rc = 0;
+
+	status = (uint64_t __rte_atomic *)&tim->impl_opaque[0];
+	status[0] = 0;
+	status[1] = 0;
+
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	lmt = (uint64_t *)lmt_addr;
+
+	lmt[0] = tim->timeout_ticks * tim_ring->tck_int;
+	lmt[1] = 0x1;
+	lmt[2] = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	lmt[3] = (uint64_t)tim;
+
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (1UL << 4);
+	roc_lmt_submit_steorl(wdata, pa);
+
+	do {
+		rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+		rsp &= 0xF0UL;
+	} while (!rsp);
+
+	rsp >>= 4;
+	switch (rsp) {
+	case 0x3:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		rc = !rc;
+		break;
+	case 0x4:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+		rc = !rc;
+		break;
+	case 0x1:
+		tim->state = RTE_EVENT_TIMER_ARMED;
+		break;
+	default:
+		tim->state = RTE_EVENT_TIMER_ERROR;
+		rc = !rc;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+cnxk_tim_add_entry_tmo_hwwqe(struct cnxk_tim_ring *const tim_ring,
+			     struct rte_event_timer **const tim, uint64_t intvl, uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t cnt, i, j, done;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+
+	/* We have 32 LMTLINES per core, but use only 1 line as we need to check status */
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	done = 0;
+	lmt = (uint64_t *)lmt_addr;
+	/* We can do upto 7 timers per LMTLINE */
+	cnt = nb_timers / CNXK_TIM_ENT_PER_LMT;
+
+	lmt[0] = intvl;
+	lmt[1] = 0x1; /* Always relative */
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)(CNXK_TIM_ENT_PER_LMT << 4);
+	for (i = 0; i < cnt; i++) {
+		status = (uint64_t __rte_atomic *)&tim[i * CNXK_TIM_ENT_PER_LMT]->impl_opaque[0];
+
+		for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+			cnxk_tim_format_event(tim[(i * CNXK_TIM_ENT_PER_LMT) + j],
+					      (struct cnxk_tim_ent *)&lmt[(j << 1) + 2]);
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[0] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[1] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += CNXK_TIM_ENT_PER_LMT;
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x4:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic
+							      *)&tim[(i * CNXK_TIM_ENT_PER_LMT) + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+							RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+			goto done;
+		}
+	}
+
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)((nb_timers - cnt) << 4);
+	if (nb_timers - cnt) {
+		status = (uint64_t __rte_atomic *)&tim[cnt]->impl_opaque[0];
+
+		for (i = 0; i < nb_timers - cnt; i++) {
+			cnxk_tim_format_event(tim[cnt + i],
+					      (struct cnxk_tim_ent *)&lmt[(i << 1) + 2]);
+			tim[cnt + i]->impl_opaque[0] = 0;
+			tim[cnt + i]->impl_opaque[1] = 0;
+			tim[cnt + i]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += (nb_timers - cnt);
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x4:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < nb_timers - cnt; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic *)&tim[cnt + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+done:
+	return done;
+}
+
 static int
 cnxk_tim_rm_entry(struct rte_event_timer *tim)
 {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* RE: [EXTERNAL] Re: [PATCH 01/20] common/cnxk: implement SSO HW info
  2024-10-22  1:52 ` [PATCH 01/20] common/cnxk: implement SSO HW info Stephen Hemminger
@ 2024-10-22  8:53   ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 181+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2024-10-22  8:53 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Jerin Jacob, Nithin Kumar Dabilpuram, Kiran Kumar Kokkilagadda,
	Sunil Kumar Kori, Satha Koteswara Rao Kottidi, Harman Kalra,
	Ankur Dwivedi, Anoob Joseph, Tejasree Kondoj, Shijith Thotton,
	dev

> On Thu, 3 Oct 2024 18:52:18 +0530
> <pbhagavatula@marvell.com> wrote:
> 
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> > Add SSO HW info mbox to get hardware capabilities, and reuse
> > them instead of depending on hardcoded values.
> > Remove redundant includes.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> 
> You need to fix direct use of __atomic in this series.

Ack, I will cleanup the driver.

^ permalink raw reply	[flat|nested] 181+ messages in thread

* RE: [EXTERNAL] Re: [PATCH 07/20] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-22  1:49   ` Stephen Hemminger
@ 2024-10-22  8:54     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 181+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2024-10-22  8:54 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Jerin Jacob, Shijith Thotton, dev

> On Thu, 3 Oct 2024 18:52:24 +0530
> <pbhagavatula@marvell.com> wrote:
> 
> > +static __rte_always_inline uint16_t
> > +cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event
> *ev, const uint32_t flags)
> > +{
> > +	union {
> > +		__uint128_t get_work;
> > +		uint64_t u64[2];
> > +	} gw;
> > +
> > +	gw.get_work = ws->gw_wdata;
> > +#if defined(RTE_ARCH_ARM64)
> > +#if defined(__clang__)
> > +	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
> > +	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
> > +#if defined(RTE_ARM_USE_WFE)
> > +	plt_write64(gw.u64[0], ws->base +
> SSOW_LF_GWS_OP_GET_WORK0);
> > +	asm volatile(PLT_CPU_FEATURE_PREAMBLE
> > +		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
> > +		     "		tbz %[x0], %[pend_gw], done%=	\n"
> > +		     "		sevl					\n"
> > +		     "rty%=:	wfe					\n"
> > +		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
> > +		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
> > +		     "done%=:						\n"
> > +		     "		dmb ld					\n"
> > +		     : [x0] "+r" (x0), [x1] "+r" (x1)
> > +		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
> > +		       [pend_gw]
> "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
> > +		     : "memory");
> > +#else
> > +	asm volatile(".arch armv8-a+lse\n"
> > +		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
> > +		     : [x0] "+r" (x0), [x1] "+r" (x1)
> > +		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
> > +		     : "memory");
> > +#endif
> > +	gw.u64[0] = x0;
> > +	gw.u64[1] = x1;
> > +#else
> > +#if defined(RTE_ARM_USE_WFE)
> > +	plt_write64(gw.u64[0], ws->base +
> SSOW_LF_GWS_OP_GET_WORK0);
> > +	asm volatile(PLT_CPU_FEATURE_PREAMBLE
> > +		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
> > +		     "		tbz %[wdata], %[pend_gw], done%=	\n"
> > +		     "		sevl					\n"
> > +		     "rty%=:	wfe					\n"
> > +		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
> > +		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
> > +		     "done%=:						\n"
> > +		     "		dmb ld					\n"
> > +		     : [wdata] "=&r"(gw.get_work)
> > +		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
> > +		       [pend_gw]
> "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
> > +		     : "memory");
> > +#else
> > +	asm volatile(PLT_CPU_FEATURE_PREAMBLE
> > +		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata],
> [%[gw_loc]]\n"
> > +		     : [wdata] "+r"(gw.get_work)
> > +		     : [gw_loc] "r"(ws->base +
> SSOW_LF_GWS_OP_GET_WORK0)
> > +		     : "memory");
> > +#endif
> > +#endif
> 
> This generates lots of warnings like:
> 
> WARNING:QUOTED_WHITESPACE_BEFORE_NEWLINE: unnecessary
> whitespace before a quoted newline
> #131: FILE: drivers/event/cnxk/cn20k_worker.h:39:
> +		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
> 
> But the bigger problem is burying assembly in a driver is a bad idea.
> The use of ARM primitives should be done in EAL, not down in the PMD.

This sequence is very specific to the SSO co-processor in CNXK. 
I don’t think we can move it to EAL.

> 


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 01/22] event/cnxk: use stdatomic API
  2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
                       ` (20 preceding siblings ...)
  2024-10-22  8:46     ` [PATCH v3 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
@ 2024-10-22 19:34     ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 02/22] common/cnxk: implement SSO HW info pbhagavatula
                         ` (22 more replies)
  21 siblings, 23 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton,
	Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
	Harman Kalra
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Replace gcc inbuilt __atomic_xxx intrinsics with rte_atomic_xxx API.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
Depends-on: series-33602 ("event/cnxk: fix getwork write data on reconfig")

v2 Changes:
- Rebase and remove single dequeue and enqueue functions.
v3 Changes:
- Remove __atomic builtins.
v4 Changes:
- Rebase onto next-event tree.

 drivers/event/cnxk/cn10k_eventdev.c         |  6 +--
 drivers/event/cnxk/cn10k_eventdev.h         |  4 +-
 drivers/event/cnxk/cn10k_tx_worker.h        |  7 ++-
 drivers/event/cnxk/cn10k_worker.c           | 15 +++---
 drivers/event/cnxk/cn10k_worker.h           |  2 +-
 drivers/event/cnxk/cn9k_eventdev.c          |  8 +--
 drivers/event/cnxk/cn9k_worker.h            | 18 ++++---
 drivers/event/cnxk/cnxk_eventdev.h          |  4 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c | 60 ++++++++++-----------
 drivers/event/cnxk/cnxk_tim_evdev.c         |  4 +-
 drivers/event/cnxk/cnxk_tim_evdev.h         | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.c        | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.h        | 57 ++++++++++----------
 drivers/event/cnxk/cnxk_worker.h            |  3 +-
 drivers/net/cnxk/cn9k_ethdev.h              |  2 +-
 15 files changed, 108 insertions(+), 102 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4edac33a84..4a2c88c8c6 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -94,9 +94,9 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	uint64_t val;

 	ws->grp_base = grp_base;
-	ws->fc_mem = (int64_t *)dev->fc_iova;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 	ws->xaq_lmt = dev->xaq_lmt;
-	ws->fc_cache_space = dev->fc_cache_space;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);

@@ -768,7 +768,7 @@ cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 	for (i = 0; i < dev->nb_event_ports; i++) {
 		struct cn10k_sso_hws *ws = event_dev->data->ports[i];
 		ws->xaq_lmt = dev->xaq_lmt;
-		ws->fc_mem = (int64_t *)dev->fc_iova;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 		ws->tstamp = dev->tstamp;
 		if (lookup_mem)
 			ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index 372121465c..b8395aa314 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -19,8 +19,8 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	uint64_t meta_aura;
 	/* Add Work Fastpath data */
-	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
-	int64_t *fc_cache_space;
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
 	int32_t xaq_lmt;
diff --git a/drivers/event/cnxk/cn10k_tx_worker.h b/drivers/event/cnxk/cn10k_tx_worker.h
index 0695ea23e1..19cb2e22e5 100644
--- a/drivers/event/cnxk/cn10k_tx_worker.h
+++ b/drivers/event/cnxk/cn10k_tx_worker.h
@@ -51,7 +51,9 @@ cn10k_sso_txq_fc_wait(const struct cn10k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -60,7 +62,8 @@ static __rte_always_inline int32_t
 cn10k_sso_sq_depth(const struct cn10k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index c49138316c..06ad7437d5 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -16,7 +16,7 @@ cn10k_sso_hws_new_event(struct cn10k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -80,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
-	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) *
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
 	       ROC_SSO_XAE_PER_XAQ;
 }

@@ -90,19 +90,20 @@ sso_lmt_aw_wait_fc(struct cn10k_sso_hws *ws, int64_t req)
 	int64_t cached, refill;

 retry:
-	while (__atomic_load_n(ws->fc_cache_space, __ATOMIC_RELAXED) < 0)
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
 		;

-	cached = __atomic_fetch_sub(ws->fc_cache_space, req, __ATOMIC_ACQUIRE) - req;
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
 	/* Check if there is enough space, else update and retry. */
 	if (cached < 0) {
 		/* Check if we have space else retry. */
 		do {
 			refill = sso_read_xaq_space(ws);
 		} while (refill <= 0);
-		__atomic_compare_exchange(ws->fc_cache_space, &cached, &refill,
-					  0, __ATOMIC_RELEASE,
-					  __ATOMIC_RELAXED);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
 		goto retry;
 	}
 }
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 5d3394508e..954dee5a2a 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -311,7 +311,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 		roc_load_pair(gw.u64[0], gw.u64[1],
 			      ws->base + SSOW_LF_GWS_WQE0);
 	} while (gw.u64[0] & BIT_ULL(63));
-	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
 #endif
 	ws->gw_rdata = gw.u64[0];
 	if (gw.u64[1])
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index b176044aa5..05e237c005 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -74,7 +74,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	if (dev->dual_ws) {
 		dws = hws;
 		dws->grp_base = grp_base;
-		dws->fc_mem = (uint64_t *)dev->fc_iova;
+		dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		dws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, dws->base[0] + SSOW_LF_GWS_NW_TIM);
@@ -82,7 +82,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	} else {
 		ws = hws;
 		ws->grp_base = grp_base;
-		ws->fc_mem = (uint64_t *)dev->fc_iova;
+		ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		ws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
@@ -822,14 +822,14 @@ cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 			struct cn9k_sso_hws_dual *dws =
 				event_dev->data->ports[i];
 			dws->xaq_lmt = dev->xaq_lmt;
-			dws->fc_mem = (uint64_t *)dev->fc_iova;
+			dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			dws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				dws->lookup_mem = lookup_mem;
 		} else {
 			struct cn9k_sso_hws *ws = event_dev->data->ports[i];
 			ws->xaq_lmt = dev->xaq_lmt;
-			ws->fc_mem = (uint64_t *)dev->fc_iova;
+			ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			ws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 064cdfe94a..71caf45574 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -28,7 +28,7 @@ cn9k_sso_hws_new_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -71,7 +71,7 @@ cn9k_sso_hws_new_event_wait(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (ws->xaq_lmt <= __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED))
+	while (ws->xaq_lmt <= rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -93,7 +93,7 @@ cn9k_sso_hws_forward_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(ws->base);
 		cn9k_sso_hws_new_event_wait(ws, ev);
 	}
@@ -110,7 +110,7 @@ cn9k_sso_hws_dual_new_event(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (dws->xaq_lmt <= *dws->fc_mem)
 		return 0;

@@ -128,7 +128,7 @@ cn9k_sso_hws_dual_new_event_wait(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (dws->xaq_lmt <= __atomic_load_n(dws->fc_mem, __ATOMIC_RELAXED))
+	while (dws->xaq_lmt <= rte_atomic_load_explicit(dws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -151,7 +151,7 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, uint64_t base,
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(base);
 		cn9k_sso_hws_dual_new_event_wait(dws, ev);
 	}
@@ -571,7 +571,9 @@ cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -740,7 +742,7 @@ static __rte_always_inline int32_t
 cn9k_sso_sq_depth(const struct cn9k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit(txq->fc_mem, rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index f147ef3c78..982bbb6a9b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -136,7 +136,7 @@ struct __rte_cache_aligned cn9k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
@@ -154,7 +154,7 @@ struct __rte_cache_aligned cn9k_sso_hws_dual {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 95c0f1b1f7..a4615c1356 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -63,7 +63,7 @@ seqn_list_update(int val)
 		return -1;

 	seqn_list[seqn_list_index++] = val;
-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	return 0;
 }

@@ -82,7 +82,7 @@ seqn_list_check(int limit)
 }

 struct test_core_param {
-	uint32_t *total_events;
+	uint32_t __rte_atomic *total_events;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port;
 	uint8_t sched_type;
@@ -540,13 +540,13 @@ static int
 worker_multi_port_fn(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;
 	int ret;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;
@@ -554,30 +554,30 @@ worker_multi_port_fn(void *arg)
 		ret = validate_event(&ev);
 		RTE_TEST_ASSERT_SUCCESS(ret, "Failed to validate event");
 		rte_pktmbuf_free(ev.mbuf);
-		__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+		rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 	}

 	return 0;
 }

 static inline int
-wait_workers_to_join(const uint32_t *count)
+wait_workers_to_join(const uint32_t __rte_atomic *count)
 {
 	uint64_t cycles, print_cycles;

 	cycles = rte_get_timer_cycles();
 	print_cycles = cycles;
-	while (__atomic_load_n(count, __ATOMIC_RELAXED)) {
+	while (rte_atomic_load_explicit(count, rte_memory_order_relaxed)) {
 		uint64_t new_cycles = rte_get_timer_cycles();

 		if (new_cycles - print_cycles > rte_get_timer_hz()) {
 			plt_info("Events %d",
-				 __atomic_load_n(count, __ATOMIC_RELAXED));
+				 rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			print_cycles = new_cycles;
 		}
 		if (new_cycles - cycles > rte_get_timer_hz() * 10000000000) {
 			plt_err("No schedules for seconds, deadlock (%d)",
-				__atomic_load_n(count, __ATOMIC_RELAXED));
+				rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			rte_event_dev_dump(evdev, stdout);
 			cycles = new_cycles;
 			return -1;
@@ -593,7 +593,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 			int (*worker_thread)(void *), uint32_t total_events,
 			uint8_t nb_workers, uint8_t sched_type)
 {
-	uint32_t atomic_total_events;
+	uint32_t __rte_atomic atomic_total_events;
 	struct test_core_param *param;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port = 0;
@@ -603,7 +603,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 	if (!nb_workers)
 		return 0;

-	__atomic_store_n(&atomic_total_events, total_events, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&atomic_total_events, total_events, rte_memory_order_relaxed);
 	seqn_list_init();

 	param = malloc(sizeof(struct test_core_param) * nb_workers);
@@ -640,7 +640,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		param[port].sched_type = sched_type;
 		param[port].port = port;
 		param[port].dequeue_tmo_ticks = dequeue_tmo_ticks;
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		w_lcore = rte_get_next_lcore(w_lcore, 1, 0);
 		if (w_lcore == RTE_MAX_LCORE) {
 			plt_err("Failed to get next available lcore");
@@ -651,7 +651,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		rte_eal_remote_launch(worker_thread, &param[port], w_lcore);
 	}

-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	ret = wait_workers_to_join(&atomic_total_events);
 	free(param);

@@ -890,13 +890,13 @@ worker_flow_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -916,8 +916,8 @@ worker_flow_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1046,13 +1046,13 @@ worker_group_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -1072,8 +1072,8 @@ worker_group_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1205,19 +1205,19 @@ static int
 worker_flow_based_pipeline_max_stages_rand_sched_type(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.sub_event_type == MAX_STAGES) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.sub_event_type++;
@@ -1284,16 +1284,16 @@ worker_queue_based_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
@@ -1329,16 +1329,16 @@ worker_mixed_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* Last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index bba70646fa..74a6da5070 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -358,7 +358,7 @@ cnxk_tim_stats_get(const struct rte_event_timer_adapter *adapter,
 		tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;

 	stats->evtim_exp_count =
-		__atomic_load_n(&tim_ring->arm_cnt, __ATOMIC_RELAXED);
+		rte_atomic_load_explicit(&tim_ring->arm_cnt, rte_memory_order_relaxed);
 	stats->ev_enq_count = stats->evtim_exp_count;
 	stats->adapter_tick_count =
 		rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div);
@@ -370,7 +370,7 @@ cnxk_tim_stats_reset(const struct rte_event_timer_adapter *adapter)
 {
 	struct cnxk_tim_ring *tim_ring = adapter->data->adapter_priv;

-	__atomic_store_n(&tim_ring->arm_cnt, 0, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&tim_ring->arm_cnt, 0, rte_memory_order_relaxed);
 	return 0;
 }

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 6cf10dbf4d..f4c61dfb44 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -108,15 +108,15 @@ struct cnxk_tim_evdev {
 struct cnxk_tim_bkt {
 	uint64_t first_chunk;
 	union {
-		uint64_t w1;
+		uint64_t __rte_atomic w1;
 		struct {
-			uint32_t nb_entry;
+			uint32_t __rte_atomic nb_entry;
 			uint8_t sbt : 1;
 			uint8_t hbt : 1;
 			uint8_t bsk : 1;
 			uint8_t rsvd : 5;
-			uint8_t lock;
-			int16_t chunk_remainder;
+			uint8_t __rte_atomic lock;
+			int16_t __rte_atomic chunk_remainder;
 		};
 	};
 	uint64_t current_chunk;
@@ -134,7 +134,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	struct rte_reciprocal_u64 fast_div;
 	struct rte_reciprocal_u64 fast_bkt;
 	uint64_t tck_int;
-	uint64_t arm_cnt;
+	uint64_t __rte_atomic arm_cnt;
 	uintptr_t base;
 	uint8_t prod_type_sp;
 	uint8_t enable_stats;
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 1f2f2fe5d8..db31f91818 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -70,7 +70,7 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, index, __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, index, rte_memory_order_relaxed);

 	return index;
 }
@@ -124,8 +124,8 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers,
-				   __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, set_timers,
+					      rte_memory_order_relaxed);

 	return set_timers;
 }
@@ -151,7 +151,7 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	int ret;

 	RTE_SET_USED(adptr);
-	rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+	rte_atomic_thread_fence(rte_memory_order_acquire);
 	for (index = 0; index < nb_timers; index++) {
 		if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) {
 			rte_errno = EALREADY;
@@ -193,7 +193,7 @@ cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 		return -ENOENT;

 	bkt = (struct cnxk_tim_bkt *)evtim->impl_opaque[1];
-	sema = __atomic_load_n(&bkt->w1, rte_memory_order_acquire);
+	sema = rte_atomic_load_explicit(&bkt->w1, rte_memory_order_acquire);
 	if (cnxk_tim_bkt_get_hbt(sema) || !cnxk_tim_bkt_get_nent(sema))
 		return -ENOENT;

diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index f530d8c5c4..e52eadbc08 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -23,19 +23,19 @@ cnxk_tim_bkt_fetch_rem(uint64_t w1)
 static inline int16_t
 cnxk_tim_bkt_get_rem(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
+	return rte_atomic_load_explicit(&bktp->chunk_remainder, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_set_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_sub_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline uint8_t
@@ -56,20 +56,20 @@ cnxk_tim_bkt_clr_bsk(struct cnxk_tim_bkt *bktp)
 	/* Clear everything except lock. */
 	const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema_lock(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
-				  __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
+					     rte_memory_order_acquire);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA, rte_memory_order_relaxed);
 }

 static inline uint64_t
@@ -77,19 +77,19 @@ cnxk_tim_bkt_inc_lock(struct cnxk_tim_bkt *bktp)
 {
 	const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, v, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_dec_lock(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_release);
 }

 static inline void
 cnxk_tim_bkt_dec_lock_relaxed(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_relaxed);
 }

 static inline uint32_t
@@ -102,19 +102,19 @@ cnxk_tim_bkt_get_nent(uint64_t w1)
 static inline void
 cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_add(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, 1, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent_relaxed(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELEASE);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_release);
 }

 static inline uint64_t
@@ -123,7 +123,7 @@ cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
 	const uint64_t v =
 		~(TIM_BUCKET_W1_M_NUM_ENTRIES << TIM_BUCKET_W1_S_NUM_ENTRIES);

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL) & v;
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel) & v;
 }

 static inline uint64_t
@@ -273,8 +273,8 @@ cnxk_tim_add_entry_sp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -356,8 +356,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -385,8 +385,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 			     : [crem] "r"(&bkt->w1)
 			     : "memory");
 #else
-		while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
-		       0)
+		while (rte_atomic_load_explicit((int64_t __rte_atomic *)&bkt->w1,
+						rte_memory_order_relaxed) < 0)
 			;
 #endif
 		goto __retry;
@@ -408,15 +408,14 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 		*chunk = *pent;
 		if (cnxk_tim_bkt_fetch_lock(lock_sema)) {
 			do {
-				lock_sema = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				lock_sema = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (cnxk_tim_bkt_fetch_lock(lock_sema) - 1);
 		}
-		rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+		rte_atomic_thread_fence(rte_memory_order_acquire);
 		mirr_bkt->current_chunk = (uintptr_t)chunk;
-		__atomic_store_n(&bkt->chunk_remainder,
-				 tim_ring->nb_chunk_slots - 1,
-				 __ATOMIC_RELEASE);
+		rte_atomic_store_explicit(&bkt->chunk_remainder, tim_ring->nb_chunk_slots - 1,
+					  rte_memory_order_release);
 	} else {
 		chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
 		chunk += tim_ring->nb_chunk_slots - rem;
@@ -489,8 +488,8 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -521,7 +520,7 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 			     : [lock] "r"(&bkt->lock)
 			     : "memory");
 #else
-		while (__atomic_load_n(&bkt->lock, __ATOMIC_RELAXED))
+		while (rte_atomic_load_explicit(&bkt->lock, rte_memory_order_relaxed))
 			;
 #endif
 		goto __retry;
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 0e0d728ba4..3592344e04 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -33,7 +33,8 @@ cnxk_sso_hws_swtag_desched(uint32_t tag, uint8_t new_tt, uint16_t grp,
 	uint64_t val;

 	val = tag | ((uint64_t)(new_tt & 0x3) << 32) | ((uint64_t)grp << 34);
-	__atomic_store_n((uint64_t *)swtag_desched_op, val, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit((uint64_t __rte_atomic *)swtag_desched_op, val,
+				  rte_memory_order_release);
 }

 static __rte_always_inline void
diff --git a/drivers/net/cnxk/cn9k_ethdev.h b/drivers/net/cnxk/cn9k_ethdev.h
index 4933954c33..c0e649655d 100644
--- a/drivers/net/cnxk/cn9k_ethdev.h
+++ b/drivers/net/cnxk/cn9k_ethdev.h
@@ -11,7 +11,7 @@
 struct cn9k_eth_txq {
 	uint64_t send_hdr_w0;
 	int64_t fc_cache_pkts;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	void *lmt_addr;
 	rte_iova_t io_addr;
 	uint64_t lso_tun_fmt;
--
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 02/22] common/cnxk: implement SSO HW info
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
                         ` (21 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra, Ankur Dwivedi,
	Anoob Joseph, Tejasree Kondoj, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HW info mbox to get hardware capabilities, and reuse
them instead of depending on hardcoded values.
Remove redundant includes.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_mbox.h              | 28 ++++++++++
 drivers/common/cnxk/roc_sso.c               | 58 ++++++++++++++++++---
 drivers/common/cnxk/roc_sso.h               |  9 ++--
 drivers/common/cnxk/version.map             |  1 +
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c   |  5 +-
 drivers/crypto/cnxk/cn9k_cryptodev_ops.c    |  9 +---
 drivers/event/cnxk/cn10k_eventdev.c         |  1 +
 drivers/event/cnxk/cn10k_eventdev.h         |  1 +
 drivers/event/cnxk/cn10k_worker.c           |  6 ++-
 drivers/event/cnxk/cnxk_eventdev.c          |  4 +-
 drivers/event/cnxk/cnxk_eventdev.h          |  3 --
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  2 +
 drivers/event/cnxk/cnxk_eventdev_stats.c    |  2 +
 drivers/event/cnxk/cnxk_tim_evdev.c         |  2 +-
 drivers/event/cnxk/cnxk_tim_worker.c        |  2 +
 drivers/event/cnxk/cnxk_worker.c            |  4 +-
 16 files changed, 103 insertions(+), 34 deletions(-)

diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index dd65946e9e..63139b5517 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,7 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
 	  tim_lf_alloc_rsp)                                                    \
@@ -2119,6 +2120,33 @@ struct ssow_chng_mship {
 	uint16_t __io hwgrps[MAX_RVU_BLKLF_CNT]; /* Array of hwgrps. */
 };
 
+struct sso_feat_info {
+	uint8_t __io hw_flr : 1;
+	uint8_t __io hw_prefetch : 1;
+	uint8_t __io sw_prefetch : 1;
+	uint8_t __io lsw : 1;
+	uint8_t __io fwd_grp : 1;
+	uint8_t __io eva_present : 1;
+	uint8_t __io no_nsched : 1;
+	uint8_t __io tag_cfg : 1;
+	uint8_t __io gwc_per_core;
+	uint16_t __io hws;
+	uint16_t __io hwgrps;
+	uint16_t __io hwgrps_per_pf;
+	uint16_t __io iue;
+	uint16_t __io taq_lines;
+	uint16_t __io taq_ent_per_line;
+	uint16_t __io xaq_buf_size;
+	uint16_t __io xaq_wq_entries;
+	uint32_t __io eva_ctx_per_hwgrp;
+	uint64_t __io rsvd[2];
+};
+
+struct sso_hw_info {
+	struct mbox_msghdr hdr;
+	struct sso_feat_info feat;
+};
+
 struct sso_hw_setconfig {
 	struct mbox_msghdr hdr;
 	uint32_t __io npa_aura_id;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 2e3b134bfc..8a219b985b 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -191,7 +191,7 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 		goto exit;
 	}
 
-	roc_sso->max_hwgrp = rsrc_cnt->sso;
+	roc_sso->max_hwgrp = PLT_MIN(rsrc_cnt->sso, roc_sso->feat.hwgrps_per_pf);
 	roc_sso->max_hws = rsrc_cnt->ssow;
 
 	rc = 0;
@@ -200,6 +200,37 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 	return rc;
 }
 
+static int
+sso_hw_info_get(struct roc_sso *roc_sso)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct sso_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_sso_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc == MBOX_MSG_INVALID) {
+		roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	} else {
+		mbox_memcpy(&roc_sso->feat, &rsp->feat, sizeof(roc_sso->feat));
+
+		if (!roc_sso->feat.hwgrps_per_pf)
+			roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	}
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 void
 sso_hws_link_modify(uint8_t hws, uintptr_t base, struct plt_bitmap *bmp, uint16_t hwgrp[],
 		    uint16_t n, uint8_t set, uint16_t enable)
@@ -319,6 +350,12 @@ roc_sso_hwgrp_base_get(struct roc_sso *roc_sso, uint16_t hwgrp)
 	return dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | hwgrp << 12);
 }
 
+uint16_t
+roc_sso_pf_func_get(void)
+{
+	return idev_sso_pffunc_get();
+}
+
 uint64_t
 roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 {
@@ -670,9 +707,8 @@ roc_sso_hwgrp_init_xaq_aura(struct roc_sso *roc_sso, uint32_t nb_xae)
 	struct dev *dev = &sso->dev;
 	int rc;
 
-	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae,
-				     roc_sso->xae_waes, roc_sso->xaq_buf_size,
-				     roc_sso->nb_hwgrp);
+	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae, roc_sso->feat.xaq_wq_entries,
+				     roc_sso->feat.xaq_buf_size, roc_sso->nb_hwgrp);
 	return rc;
 }
 
@@ -953,9 +989,11 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 		goto hwgrp_alloc_fail;
 	}
 
-	roc_sso->xaq_buf_size = rsp_hwgrp->xaq_buf_size;
-	roc_sso->xae_waes = rsp_hwgrp->xaq_wq_entries;
-	roc_sso->iue = rsp_hwgrp->in_unit_entries;
+	if (!roc_sso->feat.xaq_buf_size || !roc_sso->feat.xaq_wq_entries || !roc_sso->feat.iue) {
+		roc_sso->feat.xaq_buf_size = rsp_hwgrp->xaq_buf_size;
+		roc_sso->feat.xaq_wq_entries = rsp_hwgrp->xaq_wq_entries;
+		roc_sso->feat.iue = rsp_hwgrp->in_unit_entries;
+	}
 
 	rc = sso_msix_fill(roc_sso, nb_hws, nb_hwgrp);
 	if (rc < 0) {
@@ -1059,6 +1097,12 @@ roc_sso_dev_init(struct roc_sso *roc_sso)
 		goto fail;
 	}
 
+	rc = sso_hw_info_get(roc_sso);
+	if (rc < 0) {
+		plt_err("Failed to get SSO HW info");
+		goto fail;
+	}
+
 	rc = sso_rsrc_get(roc_sso);
 	if (rc < 0) {
 		plt_err("Failed to get SSO resources");
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 4ac901762e..021db22c86 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -8,7 +8,7 @@
 #include "hw/ssow.h"
 
 #define ROC_SSO_AW_PER_LMT_LINE_LOG2 3
-#define ROC_SSO_XAE_PER_XAQ	     352
+#define ROC_SSO_MAX_HWGRP_PER_PF     256
 
 struct roc_sso_hwgrp_qos {
 	uint16_t hwgrp;
@@ -57,9 +57,7 @@ struct roc_sso {
 	uintptr_t lmt_base;
 	struct roc_sso_xaq_data xaq;
 	/* HW Const. */
-	uint32_t xae_waes;
-	uint32_t xaq_buf_size;
-	uint32_t iue;
+	struct sso_feat_info feat;
 	/* Private data. */
 #define ROC_SSO_MEM_SZ (16 * 1024)
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
@@ -103,6 +101,9 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
 
+/* Utility function */
+uint16_t __roc_api roc_sso_pf_func_get(void);
+
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 877333b80c..de748ac409 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -516,6 +516,7 @@ INTERNAL {
 	roc_sso_hws_gwc_invalidate;
 	roc_sso_hws_unlink;
 	roc_sso_ns_to_gw;
+	roc_sso_pf_func_get;
 	roc_sso_rsrc_fini;
 	roc_sso_rsrc_init;
 	roc_tim_fini;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 88ea032bcb..dbebc5aef1 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -11,10 +11,7 @@
 
 #include <ethdev_driver.h>
 
-#include "roc_cpt.h"
-#include "roc_idev.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn10k_cryptodev.h"
 #include "cn10k_cryptodev_event_dp.h"
diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
index ae00af5019..8d10bc9f9b 100644
--- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
@@ -8,14 +8,7 @@
 #include <rte_ip.h>
 #include <rte_vect.h>
 
-#include "roc_cpt.h"
-#if defined(__aarch64__)
-#include "roc_io.h"
-#else
-#include "roc_io_generic.h"
-#endif
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn9k_cryptodev.h"
 #include "cn9k_cryptodev_ops.h"
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4a2c88c8c6..c7af0fac11 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -64,6 +64,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
 
 	return ws;
 }
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index b8395aa314..4f0eab8acb 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -23,6 +23,7 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
+	uint16_t xae_waes;
 	int32_t xaq_lmt;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 06ad7437d5..80077ec8a1 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cn10k_worker.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -81,7 +83,7 @@ static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
 	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
-	       ROC_SSO_XAE_PER_XAQ;
+		ws->xae_waes;
 }
 
 static inline void
@@ -394,7 +396,7 @@ cn10k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
 	int32_t space;
 
 	/* Do a common back-pressure check and return */
-	space = sso_read_xaq_space(ws) - ROC_SSO_XAE_PER_XAQ;
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
 	if (space <= 0)
 		return 0;
 	nb_events = space < nb_events ? space : nb_events;
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 84a55511a3..ab7420ab79 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -2,7 +2,7 @@
  * Copyright(C) 2021 Marvell.
  */
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
@@ -47,7 +47,7 @@ cnxk_sso_xaq_allocate(struct cnxk_sso_evdev *dev)
 	if (dev->num_events > 0)
 		xae_cnt = dev->num_events;
 	else
-		xae_cnt = dev->sso.iue;
+		xae_cnt = dev->sso.feat.iue;
 
 	if (dev->xae_cnt)
 		xae_cnt += dev->xae_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 982bbb6a9b..904a9b022d 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -21,9 +21,6 @@
 
 #include "cnxk_eventdev_dp.h"
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-
 #include "cnxk_tim_evdev.h"
 
 #define CNXK_SSO_XAE_CNT   "xae_cnt"
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index a4615c1356..311de3d92b 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -18,6 +18,8 @@
 #include <rte_random.h>
 #include <rte_test.h>
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_eventdev_stats.c b/drivers/event/cnxk/cnxk_eventdev_stats.c
index a8a87a06e4..6dea91aedf 100644
--- a/drivers/event/cnxk/cnxk_eventdev_stats.c
+++ b/drivers/event/cnxk/cnxk_eventdev_stats.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 74a6da5070..27a4dfb490 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -4,7 +4,7 @@
 
 #include <math.h>
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_tim_evdev.h"
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index db31f91818..5e96f6f188 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_tim_evdev.h"
 #include "cnxk_tim_worker.h"
 
diff --git a/drivers/event/cnxk/cnxk_worker.c b/drivers/event/cnxk/cnxk_worker.c
index 60876abcff..a07c9185d9 100644
--- a/drivers/event/cnxk/cnxk_worker.c
+++ b/drivers/event/cnxk/cnxk_worker.c
@@ -6,9 +6,7 @@
 #include <rte_pmd_cnxk_eventdev.h>
 #include <rte_eventdev.h>
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 struct pwords {
 	uint64_t u[5];
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 03/22] event/cnxk: add CN20K specific device probe
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 02/22] common/cnxk: implement SSO HW info pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 04/22] event/cnxk: add CN20K device config pbhagavatula
                         ` (20 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton,
	Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
	Harman Kalra, Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add platform specific event device probe and remove, also add
event device info get function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 doc/guides/eventdevs/cnxk.rst          | 23 ++++---
 doc/guides/rel_notes/release_24_11.rst |  3 +
 drivers/common/cnxk/roc_sso.c          | 10 ++-
 drivers/event/cnxk/cn20k_eventdev.c    | 93 ++++++++++++++++++++++++++
 drivers/event/cnxk/meson.build         |  8 ++-
 5 files changed, 123 insertions(+), 14 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.c

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index e21846f4e0..55028f889b 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -16,6 +16,7 @@ Supported OCTEON cnxk SoCs
 
 - CN9XX
 - CN10XX
+- CN20XX
 
 Features
 --------
@@ -36,7 +37,7 @@ Features of the OCTEON cnxk SSO PMD are:
   DRAM
 - HW accelerated dequeue timeout support to enable power management
 - HW managed event timers support through TIM, with high precision and
-  time granularity of 2.5us on CN9K and 1us on CN10K.
+  time granularity of 2.5us on CN9K and 1us on CN10K/CN20K.
 - Up to 256 TIM rings a.k.a event timer adapters.
 - Up to 8 rings traversed in parallel.
 - HW managed packets enqueued from ethdev to eventdev exposed through event eth
@@ -45,8 +46,8 @@ Features of the OCTEON cnxk SSO PMD are:
 - Lockfree Tx from event eth Tx adapter using ``RTE_ETH_TX_OFFLOAD_MT_LOCKFREE``
   capability while maintaining receive packet order.
 - Full Rx/Tx offload support defined through ethdev queue configuration.
-- HW managed event vectorization on CN10K for packets enqueued from ethdev to
-  eventdev configurable per each Rx queue in Rx adapter.
+- HW managed event vectorization on CN10K/CN20K for packets enqueued from ethdev
+  to eventdev configurable per each Rx queue in Rx adapter.
 - Event vector transmission via Tx adapter.
 - Up to 2 event link profiles.
 
@@ -93,13 +94,13 @@ Runtime Config Options
 
     -a 0002:0e:00.0,qos=[1-50-50]
 
-- ``CN10K WQE stashing support``
+- ``CN10K/CN20K WQE stashing support``
 
-  CN10K supports stashing the scheduled WQE carried by `rte_event` to the
-  cores L2 Dcache. The number of cache lines to be stashed and the offset
-  is configurable per HWGRP i.e. event queue. The dictionary format is as
-  follows `[Qx|stash_offset|stash_length]` here the stash offset can be
-  a negative integer.
+  CN10K/CN20K supports stashing the scheduled WQE carried by `rte_event`
+  to the cores L2 Dcache. The number of cache lines to be stashed and the
+  offset is configurable per HWGRP i.e. event queue. The dictionary format
+  is as follows `[Qx|stash_offset|stash_length]` here the stash offset can
+  be a negative integer.
   By default, stashing is enabled on queues which have been connected to
   Rx adapter. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed.
 
@@ -188,8 +189,8 @@ Runtime Config Options
 
     -a 0002:0e:00.0,tim_eclk_freq=122880000-1000000000-0
 
-Power Saving on CN10K
----------------------
+Power Saving on CN10K/CN20K
+---------------------------
 
 ARM cores can additionally use WFE when polling for transactions on SSO bus
 to save power i.e., in the event dequeue call ARM core can enter WFE and exit
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 5461798970..16faee186c 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -247,6 +247,9 @@ New Features
   Added ability for node to advertise and update multiple xstat counters,
   that can be retrieved using ``rte_graph_cluster_stats_get``.
 
+* **Updated Marvell cnxk event device driver.**
+
+  * Added eventdev driver support for CN20K SoC.
 
 Removed Items
 -------------
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 8a219b985b..45cf6fc39e 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -870,7 +870,10 @@ sso_update_msix_vec_count(struct roc_sso *roc_sso, uint16_t sso_vec_cnt)
 	if (idev == NULL)
 		return -ENODEV;
 
-	mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
+	if (roc_model_is_cn20k())
+		mbox_vec_cnt = RVU_MBOX_PF_INT_VEC_AFPF_MBOX + 1;
+	else
+		mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
 
 	/* Allocating vectors for the first time */
 	if (plt_intr_max_intr_get(pci_dev->intr_handle) == 0) {
@@ -1017,7 +1020,10 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 	}
 
 	/* 2 error interrupt per TIM LF */
-	sso_vec_cnt += 2 * nb_tim_lfs;
+	if (roc_model_is_cn20k())
+		sso_vec_cnt += 3 * nb_tim_lfs;
+	else
+		sso_vec_cnt += 2 * nb_tim_lfs;
 
 	rc = sso_update_msix_vec_count(roc_sso, sso_vec_cnt);
 	if (rc < 0) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
new file mode 100644
index 0000000000..c4b80f64f3
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include "roc_api.h"
+
+#include "cnxk_eventdev.h"
+
+static void
+cn20k_sso_set_rsrc(void *arg)
+{
+	struct cnxk_sso_evdev *dev = arg;
+
+	dev->max_event_ports = dev->sso.max_hws;
+	dev->max_event_queues = dev->sso.max_hwgrp > RTE_EVENT_MAX_QUEUES_PER_DEV ?
+					RTE_EVENT_MAX_QUEUES_PER_DEV :
+					dev->sso.max_hwgrp;
+}
+
+static void
+cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	dev_info->driver_name = RTE_STR(EVENTDEV_NAME_CN20K_PMD);
+	cnxk_sso_info_get(dev, dev_info);
+	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
+}
+
+static struct eventdev_ops cn20k_sso_dev_ops = {
+	.dev_infos_get = cn20k_sso_info_get,
+};
+
+static int
+cn20k_sso_init(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = roc_plt_init();
+	if (rc < 0) {
+		plt_err("Failed to initialize platform model");
+		return rc;
+	}
+
+	event_dev->dev_ops = &cn20k_sso_dev_ops;
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	rc = cnxk_sso_init(event_dev);
+	if (rc < 0)
+		return rc;
+
+	cn20k_sso_set_rsrc(cnxk_sso_pmd_priv(event_dev));
+	if (!dev->max_event_ports || !dev->max_event_queues) {
+		plt_err("Not enough eventdev resource queues=%d ports=%d", dev->max_event_queues,
+			dev->max_event_ports);
+		cnxk_sso_fini(event_dev);
+		return -ENODEV;
+	}
+
+	plt_sso_dbg("Initializing %s max_queues=%d max_ports=%d", event_dev->data->name,
+		    dev->max_event_queues, dev->max_event_ports);
+
+	return 0;
+}
+
+static int
+cn20k_sso_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	return rte_event_pmd_pci_probe(pci_drv, pci_dev, sizeof(struct cnxk_sso_evdev),
+				       cn20k_sso_init);
+}
+
+static const struct rte_pci_id cn20k_pci_sso_map[] = {
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_PF),
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_VF),
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver cn20k_pci_sso = {
+	.id_table = cn20k_pci_sso_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
+	.probe = cn20k_sso_probe,
+	.remove = cnxk_sso_remove,
+};
+
+RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
+RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
+RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 6757af74bf..21cd5c5ae6 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -14,7 +14,7 @@ else
         soc_type = platform
 endif
 
-if soc_type != 'cn9k' and soc_type != 'cn10k'
+if soc_type != 'cn9k' and soc_type != 'cn10k' and soc_type != 'cn20k'
         soc_type = 'all'
 endif
 
@@ -229,6 +229,12 @@ sources += files(
 endif
 endif
 
+if soc_type == 'cn20k' or soc_type == 'all'
+sources += files(
+        'cn20k_eventdev.c',
+)
+endif
+
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
 if cc.get_id() == 'clang'
         extra_flags += ['-Wno-asm-operand-widths']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 04/22] event/cnxk: add CN20K device config
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 02/22] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
                         ` (19 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event device configuration that attaches the requested
number of SSO HWS(event ports) and HWGRP(event queues) LFs to
the RVU PF/VF.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index c4b80f64f3..753a976cd3 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -17,6 +17,17 @@ cn20k_sso_set_rsrc(void *arg)
 					dev->sso.max_hwgrp;
 }
 
+static int
+cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
+{
+	struct cnxk_tim_evdev *tim_dev = cnxk_tim_priv_get();
+	struct cnxk_sso_evdev *dev = arg;
+	uint16_t nb_tim_lfs;
+
+	nb_tim_lfs = tim_dev ? tim_dev->nb_rings : 0;
+	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -27,8 +38,33 @@ cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *de
 	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
 }
 
+static int
+cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = cnxk_sso_dev_validate(event_dev, 1, UINT32_MAX);
+	if (rc < 0) {
+		plt_err("Invalid event device configuration");
+		return -EINVAL;
+	}
+
+	rc = cn20k_sso_rsrc_init(dev, dev->nb_event_ports, dev->nb_event_queues);
+	if (rc < 0) {
+		plt_err("Failed to initialize SSO resources");
+		return -ENODEV;
+	}
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
+	.dev_configure = cn20k_sso_dev_configure,
+
+	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 05/22] event/cnxk: add CN20k event queue configuration
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (2 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 04/22] event/cnxk: add CN20K device config pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
                         ` (18 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add setup and release functions for event queues i.e. SSO HWGRPs.
Allocate buffers in DRAM that hold inflight events.
Register device args to modify inflight event buffer count,
HWGRP QoS and stash.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c |  2 +-
 drivers/event/cnxk/cn20k_eventdev.c | 14 ++++++++++++++
 drivers/event/cnxk/cnxk_eventdev.c  |  4 ++--
 drivers/event/cnxk/cnxk_eventdev.h  |  2 +-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index c7af0fac11..49805dd91d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -1251,7 +1251,7 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
-			      CN10K_SSO_STASH "=<string>"
+			      CNXK_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
 			      CNXK_TIM_RINGS_LMT "=<int>"
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 753a976cd3..b876c36806 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -56,6 +56,12 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 		return -ENODEV;
 	}
 
+	rc = cnxk_sso_xaq_allocate(dev);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+cnxk_rsrc_fini:
+	roc_sso_rsrc_fini(&dev->sso);
 	return rc;
 }
 
@@ -64,6 +70,10 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_configure = cn20k_sso_dev_configure,
 
 	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.queue_setup = cnxk_sso_queue_setup,
+	.queue_release = cnxk_sso_queue_release,
+	.queue_attr_set = cnxk_sso_queue_attribute_set,
+
 	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
@@ -127,3 +137,7 @@ static struct rte_pci_driver cn20k_pci_sso = {
 RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
 RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
 RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
+			      CNXK_SSO_XAE_CNT "=<int>"
+			      CNXK_SSO_GGRP_QOS "=<string>"
+			      CNXK_SSO_STASH "=<string>");
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index ab7420ab79..be6a487b59 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -624,8 +624,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
 			   &dev->force_ena_bp);
 	rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag,
 			   &single_ws);
-	rte_kvargs_process(kvlist, CN10K_SSO_STASH,
-			   &parse_sso_kvargs_stash_dict, dev);
+	rte_kvargs_process(kvlist, CNXK_SSO_STASH, &parse_sso_kvargs_stash_dict,
+			   dev);
 	dev->dual_ws = !single_ws;
 	rte_kvargs_free(kvlist);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 904a9b022d..ba08fa2173 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -27,7 +27,7 @@
 #define CNXK_SSO_GGRP_QOS  "qos"
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
-#define CN10K_SSO_STASH	   "stash"
+#define CNXK_SSO_STASH	   "stash"
 
 #define CNXK_SSO_MAX_PROFILES 2
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 06/22] event/cnxk: add CN20K event port configuration
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (3 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
                         ` (17 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HWS a.k.a event port setup, release, link, unlink
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c         |  63 ++-----
 drivers/event/cnxk/cn20k_eventdev.c         | 174 ++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h         |  26 +++
 drivers/event/cnxk/cnxk_common.h            |  55 +++++++
 drivers/event/cnxk/cnxk_eventdev.h          |   6 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c |   6 +-
 6 files changed, 276 insertions(+), 54 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.h
 create mode 100644 drivers/event/cnxk/cnxk_common.h

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 49805dd91d..43bc6c0bac 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -2,15 +2,16 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include <rte_dmadev_pmd.h>
+
+#include "cn10k_cryptodev_ops.h"
+#include "cn10k_ethdev.h"
 #include "cn10k_tx_worker.h"
 #include "cn10k_worker.h"
-#include "cn10k_ethdev.h"
-#include "cn10k_cryptodev_ops.h"
+#include "cnxk_common.h"
+#include "cnxk_dma_event_dp.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
-#include "cnxk_dma_event_dp.h"
-
-#include <rte_dmadev_pmd.h>
 
 #define CN10K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                           \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
@@ -18,29 +19,6 @@
 #define CN10K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                           \
 	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
 
-static uint32_t
-cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
-{
-	uint32_t wdata = 1;
-
-	if (dev->deq_tmo_ns)
-		wdata |= BIT(16);
-
-	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_NONE:
-	default:
-		break;
-	case CN10K_GW_MODE_PREF:
-		wdata |= BIT(19);
-		break;
-	case CN10K_GW_MODE_PREF_WFE:
-		wdata |= BIT(20) | BIT(19);
-		break;
-	}
-
-	return wdata;
-}
-
 static void *
 cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -61,7 +39,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
 	ws->hws_id = port_id;
 	ws->swtag_req = 0;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
 	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
@@ -99,7 +77,7 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->xaq_lmt = dev->xaq_lmt;
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -220,12 +198,12 @@ cn10k_sso_hws_reset(void *arg, void *hws)
 	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
 
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
@@ -504,18 +482,7 @@ cn10k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
-	switch (event_dev->data->dev_conf.preschedule_type) {
-	default:
-	case RTE_EVENT_PRESCHEDULE_NONE:
-		dev->gw_mode = CN10K_GW_MODE_NONE;
-		break;
-	case RTE_EVENT_PRESCHEDULE:
-		dev->gw_mode = CN10K_GW_MODE_PREF;
-		break;
-	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
-		break;
-	}
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
 
 	rc = cnxk_setup_event_ports(event_dev, cn10k_sso_init_hws_mem,
 				    cn10k_sso_hws_setup);
@@ -598,13 +565,13 @@ cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	/* Check if we have work in PRF_WQE0, if so extract it. */
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
 		       BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index b876c36806..611906a4f0 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,7 +4,87 @@
 
 #include "roc_api.h"
 
+#include "cn20k_eventdev.h"
+#include "cnxk_common.h"
 #include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static void *
+cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws;
+
+	/* Allocate event port memory */
+	ws = rte_zmalloc("cn20k_ws", sizeof(struct cn20k_sso_hws) + RTE_CACHE_LINE_SIZE,
+			 RTE_CACHE_LINE_SIZE);
+	if (ws == NULL) {
+		plt_err("Failed to alloc memory for port=%d", port_id);
+		return NULL;
+	}
+
+	/* First cache line is reserved for cookie */
+	ws = (struct cn20k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
+	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+	ws->hws_id = port_id;
+	ws->swtag_req = 0;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->gw_rdata = SSO_TT_EMPTY << 32;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
+
+	return ws;
+}
+
+static int
+cn20k_sso_hws_link(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_link(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static int
+cn20k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_unlink(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static void
+cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t val;
+
+	ws->grp_base = grp_base;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+	ws->xaq_lmt = dev->xaq_lmt;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
+	ws->aw_lmt = dev->sso.lmt_base;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+
+	/* Set get_work timeout for HWS */
+	val = NSEC2USEC(dev->deq_tmo_ns);
+	val = val ? val - 1 : 0;
+	plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
+}
+
+static void
+cn20k_sso_hws_release(void *arg, void *hws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint16_t i, j;
+
+	for (i = 0; i < CNXK_SSO_MAX_PROFILES; i++)
+		for (j = 0; j < dev->nb_event_queues; j++)
+			roc_sso_hws_unlink(&dev->sso, ws->hws_id, &j, 1, i, 0);
+	memset(ws, 0, sizeof(*ws));
+}
 
 static void
 cn20k_sso_set_rsrc(void *arg)
@@ -60,11 +140,98 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
+
+	rc = cnxk_setup_event_ports(event_dev, cn20k_sso_init_hws_mem, cn20k_sso_hws_setup);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+	/* Restore any prior port-queue mapping. */
+	cnxk_sso_restore_links(event_dev, cn20k_sso_hws_link);
+
+	dev->configured = 1;
+	rte_mb();
+
+	return 0;
 cnxk_rsrc_fini:
 	roc_sso_rsrc_fini(&dev->sso);
+	dev->nb_event_ports = 0;
 	return rc;
 }
 
+static int
+cn20k_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
+		     const struct rte_event_port_conf *port_conf)
+{
+
+	RTE_SET_USED(port_conf);
+	return cnxk_sso_port_setup(event_dev, port_id, cn20k_sso_hws_setup);
+}
+
+static void
+cn20k_sso_port_release(void *port)
+{
+	struct cnxk_sso_hws_cookie *gws_cookie = cnxk_sso_hws_get_cookie(port);
+	struct cnxk_sso_evdev *dev;
+
+	if (port == NULL)
+		return;
+
+	dev = cnxk_sso_pmd_priv(gws_cookie->event_dev);
+	if (!gws_cookie->configured)
+		goto free;
+
+	cn20k_sso_hws_release(dev, port);
+	memset(gws_cookie, 0, sizeof(*gws_cookie));
+free:
+	rte_free(gws_cookie);
+}
+
+static int
+cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_links];
+	uint16_t link;
+
+	RTE_SET_USED(priorities);
+	for (link = 0; link < nb_links; link++)
+		hwgrp_ids[link] = queues[link];
+	nb_links = cn20k_sso_hws_link(dev, port, hwgrp_ids, nb_links, profile);
+
+	return (int)nb_links;
+}
+
+static int
+cn20k_sso_port_unlink_profile(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+			      uint16_t nb_unlinks, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_unlinks];
+	uint16_t unlink;
+
+	for (unlink = 0; unlink < nb_unlinks; unlink++)
+		hwgrp_ids[unlink] = queues[unlink];
+	nb_unlinks = cn20k_sso_hws_unlink(dev, port, hwgrp_ids, nb_unlinks, profile);
+
+	return (int)nb_unlinks;
+}
+
+static int
+cn20k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+		    const uint8_t priorities[], uint16_t nb_links)
+{
+	return cn20k_sso_port_link_profile(event_dev, port, queues, priorities, nb_links, 0);
+}
+
+static int
+cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+		      uint16_t nb_unlinks)
+{
+	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -75,6 +242,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.queue_attr_set = cnxk_sso_queue_attribute_set,
 
 	.port_def_conf = cnxk_sso_port_def_conf,
+	.port_setup = cn20k_sso_port_setup,
+	.port_release = cn20k_sso_port_release,
+	.port_link = cn20k_sso_port_link,
+	.port_unlink = cn20k_sso_port_unlink,
+	.port_link_profile = cn20k_sso_port_link_profile,
+	.port_unlink_profile = cn20k_sso_port_unlink_profile,
+	.timeout_ticks = cnxk_sso_timeout_ticks,
 };
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
new file mode 100644
index 0000000000..5b6c558d5a
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef __CN20K_EVENTDEV_H__
+#define __CN20K_EVENTDEV_H__
+
+#define CN20K_SSO_DEFAULT_STASH_OFFSET -1
+#define CN20K_SSO_DEFAULT_STASH_LENGTH 2
+
+struct __rte_cache_aligned cn20k_sso_hws {
+	uint64_t base;
+	uint32_t gw_wdata;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+	/* Add Work Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
+	uintptr_t aw_lmt;
+	uintptr_t grp_base;
+	uint16_t xae_waes;
+	int32_t xaq_lmt;
+};
+
+#endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
new file mode 100644
index 0000000000..712d82bee7
--- /dev/null
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CNXK_COMMON_H__
+#define __CNXK_COMMON_H__
+
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static uint32_t
+cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
+{
+	uint32_t wdata = 1;
+
+	if (dev->deq_tmo_ns)
+		wdata |= BIT(16);
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	case CNXK_GW_MODE_PREF:
+		wdata |= BIT(19);
+		break;
+	case CNXK_GW_MODE_PREF_WFE:
+		wdata |= BIT(20) | BIT(19);
+		break;
+	}
+
+	return wdata;
+}
+
+static uint8_t
+cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
+{
+	uint8_t gw_mode = 0;
+
+	switch (preschedule_type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		gw_mode = CNXK_GW_MODE_NONE;
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		gw_mode = CNXK_GW_MODE_PREF;
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		gw_mode = CNXK_GW_MODE_PREF_WFE;
+		break;
+	}
+
+	return gw_mode;
+}
+
+#endif /* __CNXK_COMMON_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index ba08fa2173..4066497e6b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -38,9 +38,9 @@
 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
 #define CN9K_DUAL_WS_NB_WS	     2
 
-#define CN10K_GW_MODE_NONE     0
-#define CN10K_GW_MODE_PREF     1
-#define CN10K_GW_MODE_PREF_WFE 2
+#define CNXK_GW_MODE_NONE     0
+#define CNXK_GW_MODE_PREF     1
+#define CNXK_GW_MODE_PREF_WFE 2
 
 #define CNXK_QOS_NORMALIZE(val, min, max, cnt)                                 \
 	(min + val / ((max + cnt - 1) / cnt))
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 311de3d92b..7a3262bcff 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1568,15 +1568,15 @@ cnxk_sso_selftest(const char *dev_name)
 
 	if (roc_model_runtime_is_cn10k()) {
 		printf("Verifying CN10K workslot getwork mode none\n");
-		dev->gw_mode = CN10K_GW_MODE_NONE;
+		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF;
+		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
+		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 07/22] event/cnxk: add CN20K SSO enqueue fast path
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (4 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
                         ` (16 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton, Anatoly Burakov; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS fastpath event device enqueue functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  20 +-
 drivers/event/cnxk/cn20k_worker.c   | 384 ++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  21 ++
 drivers/event/cnxk/meson.build      |   1 +
 4 files changed, 425 insertions(+), 1 deletion(-)
 create mode 100644 drivers/event/cnxk/cn20k_worker.c
 create mode 100644 drivers/event/cnxk/cn20k_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 611906a4f0..a5dd03de6e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -5,6 +5,7 @@
 #include "roc_api.h"
 
 #include "cn20k_eventdev.h"
+#include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -108,6 +109,21 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+
+static void
+cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(RTE_ARCH_ARM64)
+
+	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
+	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
+	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -265,8 +281,10 @@ cn20k_sso_init(struct rte_eventdev *event_dev)
 
 	event_dev->dev_ops = &cn20k_sso_dev_ops;
 	/* For secondary processes, the primary has done all the work */
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		cn20k_sso_fp_fns_set(event_dev);
 		return 0;
+	}
 
 	rc = cnxk_sso_init(event_dev);
 	if (rc < 0)
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
new file mode 100644
index 0000000000..c7de493681
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include <rte_vect.h>
+
+#include "roc_api.h"
+
+#include "cn20k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+/* SSO Operations */
+
+static __rte_always_inline uint8_t
+cn20k_sso_hws_new_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint64_t event_ptr = ev->u64;
+	const uint16_t grp = ev->queue_id;
+
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
+	if (ws->xaq_lmt <= *ws->fc_mem)
+		return 0;
+
+	cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grp_base + (grp << 12));
+	return 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_swtag(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
+
+	/* CNXK model
+	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
+	 *
+	 * SSO_TT_ORDERED        norm           norm             untag
+	 * SSO_TT_ATOMIC         norm           norm		   untag
+	 * SSO_TT_UNTAGGED       norm           norm             NOOP
+	 */
+
+	if (new_tt == SSO_TT_UNTAGGED) {
+		if (cur_tt != SSO_TT_UNTAGGED)
+			cnxk_sso_hws_swtag_untag(ws->base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+	} else {
+		cnxk_sso_hws_swtag_norm(tag, new_tt, ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
+	}
+	ws->swtag_req = 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_group(struct cn20k_sso_hws *ws, const struct rte_event *ev, const uint16_t grp)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+
+	plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+	cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_forward_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint8_t grp = ev->queue_id;
+
+	/* Group hasn't changed, Use SWTAG to forward the event */
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
+		cn20k_sso_hws_fwd_swtag(ws, ev);
+	else
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
+		cn20k_sso_hws_fwd_group(ws, ev, grp);
+}
+
+static inline int32_t
+sso_read_xaq_space(struct cn20k_sso_hws *ws)
+{
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
+	       ws->xae_waes;
+}
+
+static inline void
+sso_lmt_aw_wait_fc(struct cn20k_sso_hws *ws, int64_t req)
+{
+	int64_t cached, refill;
+
+retry:
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
+		;
+
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
+	/* Check if there is enough space, else update and retry. */
+	if (cached < 0) {
+		/* Check if we have space else retry. */
+		do {
+			refill = sso_read_xaq_space(ws);
+		} while (refill <= 0);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
+
+		goto retry;
+	}
+}
+
+#define VECTOR_SIZE_BITS	     0xFFFFFFFFFFF80000ULL
+#define VECTOR_GET_LINE_OFFSET(line) (19 + (3 * line))
+
+static uint64_t
+vector_size_partial_mask(uint16_t off, uint16_t cnt)
+{
+	return (VECTOR_SIZE_BITS & ~(~0x0ULL << off)) | ((uint64_t)(cnt - 1) << off);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_new_event_lmtst(struct cn20k_sso_hws *ws, uint8_t queue_id,
+			      const struct rte_event ev[], uint16_t n)
+{
+	uint16_t lines, partial_line, burst, left;
+	uint64_t wdata[2], pa[2] = {0};
+	uintptr_t lmt_addr;
+	uint16_t sz0, sz1;
+	uint16_t lmt_id;
+
+	sz0 = sz1 = 0;
+	lmt_addr = ws->aw_lmt;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	left = n;
+again:
+	burst = RTE_MIN(BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2 + ROC_LMT_LINES_PER_CORE_LOG2), left);
+
+	/* Set wdata */
+	lines = burst >> ROC_SSO_AW_PER_LMT_LINE_LOG2;
+	partial_line = burst & (BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2) - 1);
+	wdata[0] = wdata[1] = 0;
+	if (lines > BIT(ROC_LMT_LINES_PER_STR_LOG2)) {
+		wdata[0] = lmt_id;
+		wdata[0] |= 15ULL << 12;
+		wdata[0] |= VECTOR_SIZE_BITS;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = 16 << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+
+		wdata[1] = lmt_id + 16;
+		pa[1] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+
+		lines -= 17;
+		wdata[1] |= partial_line ? (uint64_t)(lines + 1) << 12 : (uint64_t)(lines << 12);
+		wdata[1] |= partial_line ? vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								    partial_line) :
+					   VECTOR_SIZE_BITS;
+		sz1 = burst - sz0;
+		partial_line = 0;
+	} else if (lines) {
+		/* We need to handle two cases here:
+		 * 1. Partial line spill over to wdata[1] i.e. lines == 16
+		 * 2. Partial line with spill lines < 16.
+		 */
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = lines << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+		if (lines == 16) {
+			wdata[0] |= 15ULL << 12;
+			wdata[0] |= VECTOR_SIZE_BITS;
+			if (partial_line) {
+				wdata[1] = lmt_id + 16;
+				pa[1] = (ws->grp_base + (queue_id << 12) +
+					 SSO_LF_GGRP_OP_AW_LMTST) |
+					((partial_line - 1) << 4);
+			}
+		} else {
+			lines -= 1;
+			wdata[0] |= partial_line ? (uint64_t)(lines + 1) << 12 :
+						   (uint64_t)(lines << 12);
+			wdata[0] |= partial_line ?
+					    vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								     partial_line) :
+					    VECTOR_SIZE_BITS;
+			sz0 += partial_line;
+		}
+		sz1 = burst - sz0;
+		partial_line = 0;
+	}
+
+	/* Only partial lines */
+	if (partial_line) {
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) |
+			((partial_line - 1) << 4);
+		sz0 = partial_line;
+		sz1 = burst - sz0;
+	}
+
+#if defined(RTE_ARCH_ARM64)
+	uint64x2_t aw_mask = {0xC0FFFFFFFFULL, ~0x0ULL};
+	uint64x2_t tt_mask = {0x300000000ULL, 0};
+	uint16_t parts;
+
+	while (burst) {
+		parts = burst > 7 ? 8 : plt_align32prevpow2(burst);
+		burst -= parts;
+		/* Lets try to fill at least one line per burst. */
+		switch (parts) {
+		case 8: {
+			uint64x2_t aw0, aw1, aw2, aw3, aw4, aw5, aw6, aw7;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+			aw4 = vandq_u64(vld1q_u64((const uint64_t *)&ev[4]), aw_mask);
+			aw5 = vandq_u64(vld1q_u64((const uint64_t *)&ev[5]), aw_mask);
+			aw6 = vandq_u64(vld1q_u64((const uint64_t *)&ev[6]), aw_mask);
+			aw7 = vandq_u64(vld1q_u64((const uint64_t *)&ev[7]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+			aw4 = vorrq_u64(vandq_u64(vshrq_n_u64(aw4, 6), tt_mask), aw4);
+			aw5 = vorrq_u64(vandq_u64(vshrq_n_u64(aw5, 6), tt_mask), aw5);
+			aw6 = vorrq_u64(vandq_u64(vshrq_n_u64(aw6, 6), tt_mask), aw6);
+			aw7 = vorrq_u64(vandq_u64(vshrq_n_u64(aw7, 6), tt_mask), aw7);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 64), aw4);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 80), aw5);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 96), aw6);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 112), aw7);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 128);
+		} break;
+		case 4: {
+			uint64x2_t aw0, aw1, aw2, aw3;
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 64);
+		} break;
+		case 2: {
+			uint64x2_t aw0, aw1;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 32);
+		} break;
+		case 1: {
+			__uint128_t aw0;
+
+			aw0 = ev[0].u64;
+			aw0 <<= 64;
+			aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+			aw0 |= (uint64_t)ev[0].sched_type << 32;
+
+			*((__uint128_t *)lmt_addr) = aw0;
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+		} break;
+		}
+		ev += parts;
+	}
+#else
+	uint16_t i;
+
+	for (i = 0; i < burst; i++) {
+		__uint128_t aw0;
+
+		aw0 = ev[0].u64;
+		aw0 <<= 64;
+		aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+		aw0 |= (uint64_t)ev[0].sched_type << 32;
+		*((__uint128_t *)lmt_addr) = aw0;
+		lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+	}
+#endif
+
+	/* wdata[0] will be always valid */
+	sso_lmt_aw_wait_fc(ws, sz0);
+	roc_lmt_submit_steorl(wdata[0], pa[0]);
+	if (wdata[1]) {
+		sso_lmt_aw_wait_fc(ws, sz1);
+		roc_lmt_submit_steorl(wdata[1], pa[1]);
+	}
+
+	left -= (sz0 + sz1);
+	if (left)
+		goto again;
+
+	return n;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	switch (ev->op) {
+	case RTE_EVENT_OP_NEW:
+		return cn20k_sso_hws_new_event(ws, ev);
+	case RTE_EVENT_OP_FORWARD:
+		cn20k_sso_hws_forward_event(ws, ev);
+		break;
+	case RTE_EVENT_OP_RELEASE:
+		if (ws->swtag_req) {
+			cnxk_sso_hws_desched(ev->u64, ws->base);
+			ws->swtag_req = 0;
+			break;
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	uint16_t idx = 0, done = 0, rc = 0;
+	struct cn20k_sso_hws *ws = port;
+	uint8_t queue_id;
+	int32_t space;
+
+	/* Do a common back-pressure check and return */
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
+	if (space <= 0)
+		return 0;
+	nb_events = space < nb_events ? space : nb_events;
+
+	do {
+		queue_id = ev[idx].queue_id;
+		for (idx = idx + 1; idx < nb_events; idx++)
+			if (queue_id != ev[idx].queue_id)
+				break;
+
+		rc = cn20k_sso_hws_new_event_lmtst(ws, queue_id, &ev[done], idx - done);
+		if (rc != (idx - done))
+			return rc + done;
+		done += rc;
+
+	} while (done < nb_events);
+
+	return done;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	cn20k_sso_hws_forward_event(ws, ev);
+
+	return 1;
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
new file mode 100644
index 0000000000..5ff8f11b38
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_WORKER_H__
+#define __CN20K_WORKER_H__
+
+#include <rte_eventdev.h>
+
+#include "cnxk_worker.h"
+#include "cn20k_eventdev.h"
+
+/* CN20K Fastpath functions. */
+uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
+					   uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 21cd5c5ae6..d0dc2320e1 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -232,6 +232,7 @@ endif
 if soc_type == 'cn20k' or soc_type == 'all'
 sources += files(
         'cn20k_eventdev.c',
+        'cn20k_worker.c',
 )
 endif
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 08/22] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (5 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
                         ` (15 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS event dequeue fastpath functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |   5 +
 drivers/event/cnxk/cn20k_worker.c   |  54 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   | 137 +++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a5dd03de6e..d1668a00c1 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -114,11 +114,16 @@ static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
+	if (dev->deq_tmo_ns)
+		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
+
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index c7de493681..2dcde0b444 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -382,3 +382,57 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 
 	return 1;
 }
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(timeout_ticks);
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return 1;
+	}
+
+	return cn20k_sso_hws_get_work(ws, ev, 0);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, 0);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, 0);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5ff8f11b38..8dc60a06ec 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -7,8 +7,136 @@
 
 #include <rte_eventdev.h>
 
-#include "cnxk_worker.h"
 #include "cn20k_eventdev.h"
+#include "cnxk_worker.h"
+
+static __rte_always_inline void
+cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
+{
+	RTE_SET_USED(ws);
+	RTE_SET_USED(flags);
+
+	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
+		 (u64[0] & 0xffffffff);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+	gw.get_work = ws->gw_wdata;
+#if defined(RTE_ARCH_ARM64)
+#if defined(__clang__)
+	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbz %[x0], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(".arch armv8-a+lse\n"
+		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+	gw.u64[0] = x0;
+	gw.u64[1] = x1;
+#else
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbz %[wdata], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [wdata] "=&r"(gw.get_work)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+		     : [wdata] "+r"(gw.get_work)
+		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+#endif
+#else
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
+#endif
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
+
+/* Used in cleaning up workslot. */
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work_empty(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+#ifdef RTE_ARCH_ARM64
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbz %[tag], 63, .Ldone%=		\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbnz %[tag], 63, .Lrty%=		\n"
+		     ".Ldone%=:	dmb ld					\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
+		     : "memory");
+#else
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+#endif
+
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+	else
+		gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+			    (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
 
 /* CN20K Fastpath functions. */
 uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
@@ -18,4 +146,11 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 
+uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+					   uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
+					       uint16_t nb_events, uint64_t timeout_ticks);
+
 #endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 09/22] event/cnxk: add CN20K event port quiesce
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (6 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
                         ` (14 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port quiesce function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 60 +++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index d1668a00c1..56e3eb87fb 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -208,6 +208,65 @@ cn20k_sso_port_release(void *port)
 	rte_free(gws_cookie);
 }
 
+static void
+cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+		       rte_eventdev_port_flush_t flush_cb, void *args)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct cn20k_sso_hws *ws = port;
+	struct rte_event ev;
+	uint64_t ptag;
+	bool is_pend;
+
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+		is_pend = true;
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	if (is_pend && ev.u64)
+		if (flush_cb)
+			flush_cb(event_dev->data->dev_id, ev, args);
+	ptag = (plt_read64(ws->base + SSOW_LF_GWS_TAG) >> 32) & SSO_TT_EMPTY;
+	if (ptag != SSO_TT_EMPTY)
+		cnxk_sso_hws_swtag_flush(ws->base);
+
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & BIT_ULL(56));
+
+	/* Check if we have work in PRF_WQE0, if so extract it. */
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (ev.u64) {
+			if (flush_cb)
+				flush_cb(event_dev->data->dev_id, ev, args);
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		do {
+			ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (ptag & BIT_ULL(56));
+	}
+	ws->swtag_req = 0;
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
 			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
@@ -265,6 +324,7 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_def_conf = cnxk_sso_port_def_conf,
 	.port_setup = cn20k_sso_port_setup,
 	.port_release = cn20k_sso_port_release,
+	.port_quiesce = cn20k_sso_port_quiesce,
 	.port_link = cn20k_sso_port_link,
 	.port_unlink = cn20k_sso_port_unlink,
 	.port_link_profile = cn20k_sso_port_link_profile,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 10/22] event/cnxk: add CN20K event port profile switch
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (7 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
                         ` (13 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port profile switch.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  1 +
 drivers/event/cnxk/cn20k_worker.c   | 11 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  1 +
 3 files changed, 13 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 56e3eb87fb..53b0b43199 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -124,6 +124,7 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 	if (dev->deq_tmo_ns)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
+	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2dcde0b444..2c723523d2 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -383,6 +383,17 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 	return 1;
 }
 
+int __rte_hot
+cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(0xFFUL);
+	ws->gw_wdata |= (profile + 1);
+
+	return 0;
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 8dc60a06ec..447f28f0f2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -145,6 +145,7 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 					       uint16_t nb_events);
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
+int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 11/22] event/cnxk: add CN20K event port preschedule
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (8 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 12/22] event/cnxk: add CN20K device start pbhagavatula
                         ` (12 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra, Pavan Nikhilesh,
	Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port preschedule modify and preschedule
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/ssow.h       |  1 +
 drivers/event/cnxk/cn20k_eventdev.c |  2 ++
 drivers/event/cnxk/cn20k_worker.c   | 30 +++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  3 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
index c146a8c3ef..ec6bd7896b 100644
--- a/drivers/common/cnxk/hw/ssow.h
+++ b/drivers/common/cnxk/hw/ssow.h
@@ -37,6 +37,7 @@
 #define SSOW_LF_GWS_PRF_WQE1	     (0x448ull) /* [CN10K, .) */
 #define SSOW_LF_GWS_OP_GET_WORK0     (0x600ull)
 #define SSOW_LF_GWS_OP_GET_WORK1     (0x608ull) /* [CN10K, .) */
+#define SSOW_LF_GWS_OP_PRF_GETWORK   (0x610ull) /* [CN20K, .) */
 #define SSOW_LF_GWS_OP_SWTAG_FLUSH   (0x800ull)
 #define SSOW_LF_GWS_OP_SWTAG_UNTAG   (0x810ull)
 #define SSOW_LF_GWS_OP_SWTP_CLR	     (0x820ull)
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 53b0b43199..a788eeed63 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -125,6 +125,8 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
+	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
+	event_dev->preschedule = cn20k_sso_hws_preschedule;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2c723523d2..ebfe863bc5 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -394,6 +394,36 @@ cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
 	return 0;
 }
 
+int __rte_hot
+cn20k_sso_hws_preschedule_modify(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(BIT(19) | BIT(20));
+	switch (type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		ws->gw_wdata |= BIT(19);
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		ws->gw_wdata |= BIT(19) | BIT(20);
+		break;
+	}
+
+	return 0;
+}
+
+void __rte_hot
+cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(type);
+	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 447f28f0f2..dd8b72bc53 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -146,6 +146,9 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
+int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
+					       enum rte_event_dev_preschedule_type type);
+void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 12/22] event/cnxk: add CN20K device start
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (9 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
                         ` (11 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K start function along with few cleanup API's to maintain
sanity.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c | 103 +--------------------------
 drivers/event/cnxk/cn20k_eventdev.c |  76 ++++++++++++++++++++
 drivers/event/cnxk/cnxk_common.h    | 104 ++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 100 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 43bc6c0bac..f2e591f547 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -154,83 +154,6 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
 	return 0;
 }
 
-static void
-cn10k_sso_hws_reset(void *arg, void *hws)
-{
-	struct cnxk_sso_evdev *dev = arg;
-	struct cn10k_sso_hws *ws = hws;
-	uintptr_t base = ws->base;
-	uint64_t pend_state;
-	union {
-		__uint128_t wdata;
-		uint64_t u64[2];
-	} gw;
-	uint8_t pend_tt;
-	bool is_pend;
-
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
-	/* Wait till getwork/swtp/waitw/desched completes. */
-	is_pend = false;
-	/* Work in WQE0 is always consumed, unless its a SWTAG. */
-	pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
-	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
-	    ws->swtag_req)
-		is_pend = true;
-
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
-			       BIT_ULL(56) | BIT_ULL(54)));
-	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
-			cnxk_sso_hws_swtag_untag(base +
-						 SSOW_LF_GWS_OP_SWTAG_UNTAG);
-		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-	} else if (pend_tt != SSO_TT_EMPTY) {
-		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-	}
-
-	/* Wait for desched to complete. */
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
-
-	switch (dev->gw_mode) {
-	case CNXK_GW_MODE_PREF:
-	case CNXK_GW_MODE_PREF_WFE:
-		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
-			;
-		break;
-	case CNXK_GW_MODE_NONE:
-	default:
-		break;
-	}
-
-	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
-	    SSO_TT_EMPTY) {
-		plt_write64(BIT_ULL(16) | 1,
-			    ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		do {
-			roc_load_pair(gw.u64[0], gw.u64[1],
-				      ws->base + SSOW_LF_GWS_WQE0);
-		} while (gw.u64[0] & BIT_ULL(63));
-		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-			if (pend_tt == SSO_TT_ATOMIC ||
-			    pend_tt == SSO_TT_ORDERED)
-				cnxk_sso_hws_swtag_untag(
-					base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
-			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-		}
-	}
-
-	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	rte_mb();
-}
-
 static void
 cn10k_sso_set_rsrc(void *arg)
 {
@@ -640,24 +563,6 @@ cn10k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn10k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
-static void
-cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev)
-{
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
-	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
-	int i, rc;
-
-	plt_sso_dbg();
-	for (i = 0; i < dev->stash_cnt; i++) {
-		stash[i].hwgrp = dev->stash_parse_data[i].queue;
-		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
-		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
-	}
-	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
-	if (rc < 0)
-		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
-}
-
 static int
 cn10k_sso_start(struct rte_eventdev *event_dev)
 {
@@ -669,9 +574,8 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
 	if (rc < 0)
 		return rc;
 
-	cn10k_sso_configure_queue_stash(event_dev);
-	rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
-			    cn10k_sso_hws_flush_events);
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 	if (rc < 0)
 		return rc;
 	cn10k_sso_fp_fns_set(event_dev);
@@ -692,8 +596,7 @@ cn10k_sso_stop(struct rte_eventdev *event_dev)
 	for (i = 0; i < event_dev->data->nb_ports; i++)
 		hws[i] = i;
 	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
-	cnxk_sso_stop(event_dev, cn10k_sso_hws_reset,
-		      cn10k_sso_hws_flush_events);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 }
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a788eeed63..69c593ed60 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -87,6 +87,61 @@ cn20k_sso_hws_release(void *arg, void *hws)
 	memset(ws, 0, sizeof(*ws));
 }
 
+static int
+cn20k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cnxk_handle_event_t fn,
+			   void *arg)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg);
+	uint64_t retry = CNXK_SSO_FLUSH_RETRY_MAX;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t cq_ds_cnt = 1;
+	uint64_t aq_cnt = 1;
+	uint64_t ds_cnt = 1;
+	struct rte_event ev;
+	uint64_t val, req;
+
+	plt_write64(0, base + SSO_LF_GGRP_QCTL);
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	req = queue_id;	    /* GGRP ID */
+	req |= BIT_ULL(18); /* Grouped */
+	req |= BIT_ULL(16); /* WAIT */
+
+	aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+	ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+	cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+	cq_ds_cnt &= 0x3FFF3FFF0000;
+
+	while (aq_cnt || cq_ds_cnt || ds_cnt) {
+		plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (fn != NULL && ev.u64 != 0)
+			fn(arg, ev);
+		if (ev.sched_type != SSO_TT_EMPTY)
+			cnxk_sso_hws_swtag_flush(ws->base);
+		else if (retry-- == 0)
+			break;
+		do {
+			val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (val & BIT_ULL(56));
+		aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+		ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+		cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+		/* Extract cq and ds count */
+		cq_ds_cnt &= 0x3FFF3FFF0000;
+	}
+
+	if (aq_cnt || cq_ds_cnt || ds_cnt)
+		return -EAGAIN;
+
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	rte_mb();
+
+	return 0;
+}
+
 static void
 cn20k_sso_set_rsrc(void *arg)
 {
@@ -315,6 +370,25 @@ cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
+static int
+cn20k_sso_start(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int rc, i;
+
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set(event_dev);
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -333,6 +407,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_link_profile = cn20k_sso_port_link_profile,
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
+
+	.dev_start = cn20k_sso_start,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
index 712d82bee7..c361d0530d 100644
--- a/drivers/event/cnxk/cnxk_common.h
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -8,6 +8,15 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+struct cnxk_sso_hws_prf {
+	uint64_t base;
+	uint32_t gw_wdata;
+	void *lookup_mem;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+};
+
 static uint32_t
 cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
 {
@@ -52,4 +61,99 @@ cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
 	return gw_mode;
 }
 
+static void
+cnxk_sso_hws_reset(void *arg, void *ws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cnxk_sso_hws_prf *ws_prf;
+	uint64_t pend_state;
+	uint8_t swtag_req;
+	uintptr_t base;
+	uint8_t hws_id;
+	union {
+		__uint128_t wdata;
+		uint64_t u64[2];
+	} gw;
+	uint8_t pend_tt;
+	bool is_pend;
+
+	ws_prf = ws;
+	base = ws_prf->base;
+	hws_id = ws_prf->hws_id;
+	swtag_req = ws_prf->swtag_req;
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	/* Wait till getwork/swtp/waitw/desched completes. */
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || swtag_req)
+		is_pend = true;
+
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state &
+		 (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+			cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+	} else if (pend_tt != SSO_TT_EMPTY) {
+		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+	}
+
+	/* Wait for desched to complete. */
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, base + SSOW_LF_GWS_OP_GET_WORK0);
+		do {
+			roc_load_pair(gw.u64[0], gw.u64[1], base + SSOW_LF_GWS_WQE0);
+		} while (gw.u64[0] & BIT_ULL(63));
+		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+			if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+				cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+		}
+	}
+
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	rte_mb();
+}
+
+static void
+cnxk_sso_configure_queue_stash(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
+	int i, rc;
+
+	plt_sso_dbg();
+	for (i = 0; i < dev->stash_cnt; i++) {
+		stash[i].hwgrp = dev->stash_parse_data[i].queue;
+		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
+		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
+	}
+	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
+	if (rc < 0)
+		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+}
+
 #endif /* __CNXK_COMMON_H__ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 13/22] event/cnxk: add CN20K device stop and close
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (10 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 12/22] event/cnxk: add CN20K device start pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
                         ` (10 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event device stop and close callback functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 69c593ed60..6195b29705 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -389,6 +389,25 @@ cn20k_sso_start(struct rte_eventdev *event_dev)
 	return rc;
 }
 
+static void
+cn20k_sso_stop(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int i;
+
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+}
+
+static int
+cn20k_sso_close(struct rte_eventdev *event_dev)
+{
+	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -409,6 +428,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
 	.dev_start = cn20k_sso_start,
+	.dev_stop = cn20k_sso_stop,
+	.dev_close = cn20k_sso_close,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 14/22] event/cnxk: add CN20K xstats, selftest and dump
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (11 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
                         ` (9 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add selftest to verify SSO, xstats to get queue specific
stats and add function to dump internal state of SSO.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test/test_eventdev.c                    |  7 +++++++
 drivers/event/cnxk/cn20k_eventdev.c         | 12 ++++++++++++
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  8 ++++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c
index a9258d2abc..f4da07c596 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -1521,6 +1521,12 @@ test_eventdev_selftest_cn10k(void)
 	return test_eventdev_selftest_impl("event_cn10k", "");
 }
 
+static int
+test_eventdev_selftest_cn20k(void)
+{
+	return test_eventdev_selftest_impl("event_cn20k", "");
+}
+
 #endif /* !RTE_EXEC_ENV_WINDOWS */
 
 REGISTER_FAST_TEST(eventdev_common_autotest, true, true, test_eventdev_common);
@@ -1532,5 +1538,6 @@ REGISTER_DRIVER_TEST(eventdev_selftest_dpaa2, test_eventdev_selftest_dpaa2);
 REGISTER_DRIVER_TEST(eventdev_selftest_dlb2, test_eventdev_selftest_dlb2);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn9k, test_eventdev_selftest_cn9k);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn10k, test_eventdev_selftest_cn10k);
+REGISTER_DRIVER_TEST(eventdev_selftest_cn20k, test_eventdev_selftest_cn20k);
 
 #endif /* !RTE_EXEC_ENV_WINDOWS */
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 6195b29705..793098bd61 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -408,6 +408,12 @@ cn20k_sso_close(struct rte_eventdev *event_dev)
 	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
 }
 
+static int
+cn20k_sso_selftest(void)
+{
+	return cnxk_sso_selftest(RTE_STR(event_cn20k));
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -427,9 +433,15 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.xstats_get = cnxk_sso_xstats_get,
+	.xstats_reset = cnxk_sso_xstats_reset,
+	.xstats_get_names = cnxk_sso_xstats_get_names,
+
+	.dump = cnxk_sso_dump,
 	.dev_start = cn20k_sso_start,
 	.dev_stop = cn20k_sso_stop,
 	.dev_close = cn20k_sso_close,
+	.dev_selftest = cn20k_sso_selftest,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 7a3262bcff..8f3d0982e9 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1566,16 +1566,16 @@ cnxk_sso_selftest(const char *dev_name)
 			return rc;
 	}
 
-	if (roc_model_runtime_is_cn10k()) {
-		printf("Verifying CN10K workslot getwork mode none\n");
+	if (roc_model_runtime_is_cn10k() || roc_model_runtime_is_cn20k()) {
+		printf("Verifying %s workslot getwork mode none\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode prefetch\n");
+		printf("Verifying %s workslot getwork mode prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
+		printf("Verifying %s workslot getwork mode smart prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 15/22] event/cnxk: support CN20K Rx adapter
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (12 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:34       ` [PATCH v4 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
                         ` (8 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for CN20K event eth Rx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 121 +++++++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_eventdev.h |   4 +
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 793098bd61..602fbd6359 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,6 +4,7 @@
 
 #include "roc_api.h"
 
+#include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
@@ -414,6 +415,117 @@ cn20k_sso_selftest(void)
 	return cnxk_sso_selftest(RTE_STR(event_cn20k));
 }
 
+static int
+cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+			      const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+	int rc;
+
+	RTE_SET_USED(event_dev);
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 9);
+	if (rc)
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+	else
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+	return 0;
+}
+
+static void
+cn20k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		ws->xaq_lmt = dev->xaq_lmt;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+		ws->tstamp = dev->tstamp;
+		if (lookup_mem)
+			ws->lookup_mem = lookup_mem;
+	}
+}
+
+static void
+eventdev_fops_tstamp_update(struct rte_eventdev *event_dev)
+{
+	struct rte_event_fp_ops *fp_op = rte_event_fp_ops + event_dev->data->dev_id;
+
+	fp_op->dequeue_burst = event_dev->dequeue_burst;
+}
+
+static void
+cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct cnxk_eth_dev *cnxk_eth_dev = dev->data->dev_private;
+	struct rte_eventdev *event_dev = cnxk_eth_dev->evdev_priv;
+	struct cnxk_sso_evdev *evdev = cnxk_sso_pmd_priv(event_dev);
+
+	evdev->rx_offloads |= flags;
+	if (ptp_en)
+		evdev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
+	else
+		evdev->tstamp[port_id] = NULL;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	eventdev_fops_tstamp_update(event_dev);
+}
+
+static int
+cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
+			       const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash;
+	struct cn20k_eth_rxq *rxq;
+	void *lookup_mem;
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	if (rc)
+		return -EINVAL;
+
+	cnxk_eth_dev->cnxk_sso_ptp_tstamp_cb = cn20k_sso_tstamp_hdl_update;
+	cnxk_eth_dev->evdev_priv = (struct rte_eventdev *)(uintptr_t)event_dev;
+
+	rxq = eth_dev->data->rx_queues[0];
+	lookup_mem = rxq->lookup_mem;
+	cn20k_sso_set_priv_mem(event_dev, lookup_mem);
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	if (roc_feature_sso_has_stash() && dev->nb_event_ports > 1) {
+		stash.hwgrp = queue_conf->ev.queue_id;
+		stash.stash_offset = CN20K_SSO_DEFAULT_STASH_OFFSET;
+		stash.stash_count = CN20K_SSO_DEFAULT_STASH_LENGTH;
+		rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1);
+		if (rc < 0)
+			plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+	}
+
+	return 0;
+}
+
+static int
+cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id)
+{
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -433,6 +545,12 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.eth_rx_adapter_caps_get = cn20k_sso_rx_adapter_caps_get,
+	.eth_rx_adapter_queue_add = cn20k_sso_rx_adapter_queue_add,
+	.eth_rx_adapter_queue_del = cn20k_sso_rx_adapter_queue_del,
+	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -509,4 +627,5 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
-			      CNXK_SSO_STASH "=<string>");
+			      CNXK_SSO_STASH "=<string>"
+			      CNXK_SSO_FORCE_BP "=1");
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 5b6c558d5a..7a6363a89e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -11,9 +11,13 @@
 struct __rte_cache_aligned cn20k_sso_hws {
 	uint64_t base;
 	uint32_t gw_wdata;
+	void *lookup_mem;
 	uint64_t gw_rdata;
 	uint8_t swtag_req;
 	uint8_t hws_id;
+	/* PTP timestamp */
+	struct cnxk_timesync_info **tstamp;
+	uint64_t meta_aura;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
 	int64_t __rte_atomic *fc_cache_space;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 16/22] event/cnxk: support CN20K Rx adapter fast path
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (13 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
@ 2024-10-22 19:34       ` pbhagavatula
  2024-10-22 19:35       ` [PATCH v4 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
                         ` (7 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:34 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c           | 122 ++++++++++++-
 drivers/event/cnxk/cn20k_worker.c             |  54 ------
 drivers/event/cnxk/cn20k_worker.h             | 165 +++++++++++++++++-
 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c |  22 +++
 .../cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c   |  22 +++
 .../event/cnxk/deq/cn20k/deq_112_127_burst.c  |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_seg_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_tmo_burst.c    |  22 +++
 .../deq/cn20k/deq_112_127_tmo_seg_burst.c     |  22 +++
 .../event/cnxk/deq/cn20k/deq_16_31_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_32_47_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_burst.c      |  23 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_48_63_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_64_79_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_80_95_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_96_111_burst.c   |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_seg_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_all_offload.c    |  65 +++++++
 drivers/event/cnxk/meson.build                |  43 +++++
 37 files changed, 1085 insertions(+), 69 deletions(-)
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 602fbd6359..408014036a 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -11,6 +11,9 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+#define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
+	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -165,21 +168,124 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+#if defined(RTE_ARCH_ARM64)
+static inline void
+cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
+{
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	const event_dequeue_burst_t sso_hws_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_seg_burst);
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_seg_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_deq_seg_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_seg_burst);
+		}
+	} else {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst, sso_hws_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_burst);
+		}
+	}
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
+static inline void
+cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
+	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
+		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+#endif
 
 static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	cn20k_sso_fp_blk_fns_set(event_dev);
+	cn20k_sso_fp_tmplt_fns_set(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
-	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
-	if (dev->deq_tmo_ns)
-		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
-
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
 	event_dev->preschedule = cn20k_sso_hws_preschedule;
@@ -286,7 +392,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
 	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
 
-	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	cn20k_sso_hws_get_work_empty(ws, &ev,
+				     (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 	if (is_pend && ev.u64)
 		if (flush_cb)
 			flush_cb(event_dev->data->dev_id, ev, args);
@@ -312,7 +419,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
 		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		cn20k_sso_hws_get_work_empty(
+			ws, &ev, (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 		if (ev.u64) {
 			if (flush_cb)
 				flush_cb(event_dev->data->dev_id, ev, args);
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index ebfe863bc5..53daf3b4b0 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -423,57 +423,3 @@ cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
 	RTE_SET_USED(type);
 	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
 }
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return 1;
-	}
-
-	return cn20k_sso_hws_get_work(ws, ev, 0);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return ret;
-	}
-
-	ret = cn20k_sso_hws_get_work(ws, ev, 0);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn20k_sso_hws_get_work(ws, ev, 0);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			    uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index dd8b72bc53..9075073fd2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -8,16 +8,64 @@
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
+#include "cn20k_rx.h"
 #include "cnxk_worker.h"
 
+/* CN20K Rx event fastpath */
+
+static __rte_always_inline void
+cn20k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id, const uint32_t tag,
+		  const uint32_t flags, const void *const lookup_mem, uintptr_t cpth,
+		  uintptr_t sa_base)
+{
+	const uint64_t mbuf_init =
+		0x100010000ULL | RTE_PKTMBUF_HEADROOM | (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+	struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf;
+
+	cn20k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, (struct rte_mbuf *)mbuf, lookup_mem,
+			      mbuf_init | ((uint64_t)port_id) << 48, cpth, sa_base, flags);
+}
+
+static void
+cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info *tstamp)
+{
+	uint64_t tstamp_ptr;
+	uint8_t laptr;
+
+	laptr = (uint8_t)*(uint64_t *)(u64 + (CNXK_SSO_WQE_LAYR_PTR * sizeof(uint64_t)));
+	if (laptr == sizeof(uint64_t)) {
+		/* Extracting tstamp, if PTP enabled*/
+		tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64) + CNXK_SSO_WQE_SG_PTR);
+		cn20k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, true,
+					 (uint64_t *)tstamp_ptr);
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
-	RTE_SET_USED(ws);
-	RTE_SET_USED(flags);
+	uintptr_t sa_base = 0;
 
 	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
 		 (u64[0] & 0xffffffff);
+	if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		uintptr_t cpth = 0;
+		uint64_t mbuf;
+
+		mbuf = u64[1] - sizeof(struct rte_mbuf);
+		rte_prefetch0((void *)mbuf);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(((struct rte_mbuf *)mbuf)->pool, (void **)&mbuf, 1, 1);
+
+		u64[0] = CNXK_CLR_SUB_EVENT(u64[0]);
+		cn20k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xFFFFF, flags, ws->lookup_mem, cpth,
+				  sa_base);
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
+		u64[1] = mbuf;
+	}
 }
 
 static __rte_always_inline uint16_t
@@ -150,11 +198,112 @@ int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
 					       enum rte_event_dev_preschedule_type type);
 void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
-uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-					   uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-					       uint16_t nb_events, uint64_t timeout_ticks);
+#define R(name, flags)                                                                             \
+	uint16_t __rte_hot cn20k_sso_hws_deq_burst_##name(                                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_burst_##name(                                      \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_seg_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_seg_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_seg_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_seg_burst_##name(                              \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_burst_##name(                                    \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_seg_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_seg_burst_##name(                            \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_seg_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_seg_burst_##name(                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_DEQ(fn, flags)                                                                         \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(timeout_ticks);                                                       \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return 1;                                                                  \
+		}                                                                                  \
+		return cn20k_sso_hws_get_work(ws, ev, flags);                                      \
+	}
+
+#define SSO_DEQ_SEG(fn, flags) SSO_DEQ(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define SSO_DEQ_TMO(fn, flags)                                                                     \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint16_t ret = 1;                                                                  \
+		uint64_t iter;                                                                     \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return ret;                                                                \
+		}                                                                                  \
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);                                       \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)                         \
+			ret = cn20k_sso_hws_get_work(ws, ev, flags);                               \
+		return ret;                                                                        \
+	}
+
+#define SSO_DEQ_TMO_SEG(fn, flags) SSO_DEQ_TMO(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define R(name, flags)                                                                             \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                                                   \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)                              \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                                           \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                                           \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)                                   \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_CMN_DEQ_BURST(fnb, fn, flags)                                                          \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+#define SSO_CMN_DEQ_SEG_BURST(fnb, fn, flags)                                                      \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[],
+						       uint16_t nb_events, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[],
+							   uint16_t nb_events,
+							   uint64_t timeout_ticks);
 
 #endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
new file mode 100644
index 0000000000..f7e0e8fe71
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
new file mode 100644
index 0000000000..7d5d4823c3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		      cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
new file mode 100644
index 0000000000..1bdc4bc82d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
new file mode 100644
index 0000000000..d3ed5fcac0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                                             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,                                  \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)                                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,                             \
+			  cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
new file mode 100644
index 0000000000..29c21441cf
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
new file mode 100644
index 0000000000..004b5ecb95
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
new file mode 100644
index 0000000000..d544b39e9e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
new file mode 100644
index 0000000000..ba7a1207ad
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
new file mode 100644
index 0000000000..eb7382e9d9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
new file mode 100644
index 0000000000..770b7221e6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
new file mode 100644
index 0000000000..1e71d22fc3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
new file mode 100644
index 0000000000..1a9e7efa0a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
new file mode 100644
index 0000000000..3d51bd6659
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
new file mode 100644
index 0000000000..851b5b7d31
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
new file mode 100644
index 0000000000..038ba726a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name,                   \
+			  flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
new file mode 100644
index 0000000000..68fb3ff53d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
new file mode 100644
index 0000000000..84f3ccd39c
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
new file mode 100644
index 0000000000..417f622412
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
new file mode 100644
index 0000000000..7fbea69134
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
new file mode 100644
index 0000000000..3bee216768
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
new file mode 100644
index 0000000000..9b341a0df5
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
new file mode 100644
index 0000000000..1f051f74a9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			   cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
new file mode 100644
index 0000000000..c134e27f25
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
new file mode 100644
index 0000000000..849e8e12fc
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		 cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
new file mode 100644
index 0000000000..9724caf5d6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
new file mode 100644
index 0000000000..997c208511
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
new file mode 100644
index 0000000000..bcf32e646b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
new file mode 100644
index 0000000000..b24e73439a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
new file mode 100644
index 0000000000..c03d034b66
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
new file mode 100644
index 0000000000..b37ef7a998
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
new file mode 100644
index 0000000000..da76b589a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
new file mode 100644
index 0000000000..3a8c02e4d2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_all_offload.c b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
new file mode 100644
index 0000000000..3983736b7e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[], uint16_t nb_events,
+				    uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[], uint16_t nb_events,
+					uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_TSTAMP_F | NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index d0dc2320e1..a2bafab268 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -234,6 +234,49 @@ sources += files(
         'cn20k_eventdev.c',
         'cn20k_worker.c',
 )
+
+if host_machine.cpu_family().startswith('aarch') and not disable_template
+sources += files(
+        'deq/cn20k/deq_0_15_burst.c',
+        'deq/cn20k/deq_16_31_burst.c',
+        'deq/cn20k/deq_32_47_burst.c',
+        'deq/cn20k/deq_48_63_burst.c',
+        'deq/cn20k/deq_64_79_burst.c',
+        'deq/cn20k/deq_80_95_burst.c',
+        'deq/cn20k/deq_96_111_burst.c',
+        'deq/cn20k/deq_112_127_burst.c',
+        'deq/cn20k/deq_0_15_seg_burst.c',
+        'deq/cn20k/deq_16_31_seg_burst.c',
+        'deq/cn20k/deq_32_47_seg_burst.c',
+        'deq/cn20k/deq_48_63_seg_burst.c',
+        'deq/cn20k/deq_64_79_seg_burst.c',
+        'deq/cn20k/deq_80_95_seg_burst.c',
+        'deq/cn20k/deq_96_111_seg_burst.c',
+        'deq/cn20k/deq_112_127_seg_burst.c',
+        'deq/cn20k/deq_0_15_tmo_burst.c',
+        'deq/cn20k/deq_16_31_tmo_burst.c',
+        'deq/cn20k/deq_32_47_tmo_burst.c',
+        'deq/cn20k/deq_48_63_tmo_burst.c',
+        'deq/cn20k/deq_64_79_tmo_burst.c',
+        'deq/cn20k/deq_80_95_tmo_burst.c',
+        'deq/cn20k/deq_96_111_tmo_burst.c',
+        'deq/cn20k/deq_112_127_tmo_burst.c',
+        'deq/cn20k/deq_0_15_tmo_seg_burst.c',
+        'deq/cn20k/deq_16_31_tmo_seg_burst.c',
+        'deq/cn20k/deq_32_47_tmo_seg_burst.c',
+        'deq/cn20k/deq_48_63_tmo_seg_burst.c',
+        'deq/cn20k/deq_64_79_tmo_seg_burst.c',
+        'deq/cn20k/deq_80_95_tmo_seg_burst.c',
+        'deq/cn20k/deq_96_111_tmo_seg_burst.c',
+        'deq/cn20k/deq_112_127_tmo_seg_burst.c',
+        'deq/cn20k/deq_all_offload.c',
+)
+
+else
+sources += files(
+        'deq/cn20k/deq_all_offload.c',
+)
+endif
 endif
 
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 17/22] event/cnxk: support CN20K Tx adapter
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (14 preceding siblings ...)
  2024-10-22 19:34       ` [PATCH v4 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
@ 2024-10-22 19:35       ` pbhagavatula
  2024-10-22 19:35       ` [PATCH v4 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
                         ` (6 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:35 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c  | 126 +++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h  |   4 +
 drivers/event/cnxk/cn20k_tx_worker.h |  16 ++++
 3 files changed, 146 insertions(+)
 create mode 100644 drivers/event/cnxk/cn20k_tx_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 408014036a..509c6ea630 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -6,6 +6,7 @@
 
 #include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
+#include "cn20k_tx_worker.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
@@ -168,6 +169,35 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+static int
+cn20k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	if (dev->tx_adptr_data == NULL)
+		return 0;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		void *ws_cookie;
+
+		ws_cookie = cnxk_sso_hws_get_cookie(ws);
+		ws_cookie = rte_realloc_socket(ws_cookie,
+					       sizeof(struct cnxk_sso_hws_cookie) +
+						       sizeof(struct cn20k_sso_hws) +
+						       dev->tx_adptr_data_sz,
+					       RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+		if (ws_cookie == NULL)
+			return -ENOMEM;
+		ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+		memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, dev->tx_adptr_data_sz);
+		event_dev->data->ports[i] = ws;
+	}
+
+	return 0;
+}
+
 #if defined(RTE_ARCH_ARM64)
 static inline void
 cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
@@ -634,6 +664,95 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+			      uint32_t *caps)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		*caps = 0;
+	else
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+	return 0;
+}
+
+static void
+cn20k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cn20k_eth_txq *txq;
+	struct roc_nix_sq *sq;
+	int i;
+
+	if (tx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+			cn20k_sso_txq_fc_update(eth_dev, i);
+	} else {
+		uint16_t sqes_per_sqb;
+
+		sq = &cnxk_eth_dev->sqs[tx_queue_id];
+		txq = eth_dev->data->tx_queues[tx_queue_id];
+		sqes_per_sqb = 1U << txq->sqes_per_sqb_log2;
+		if (cnxk_eth_dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
+			sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc / sqes_per_sqb);
+		txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
+	}
+}
+
+static int
+cn20k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint64_t tx_offloads;
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+
+	/* Can't enable tstamp if all the ports don't have it enabled. */
+	tx_offloads = cnxk_eth_dev->tx_offload_flags;
+	if (dev->tx_adptr_configured) {
+		uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+		uint8_t tstmp_ena = !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+		if (tstmp_ena && !tstmp_req)
+			dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+		else if (!tstmp_ena && tstmp_req)
+			tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+	}
+
+	dev->tx_offloads |= tx_offloads;
+	cn20k_sso_txq_fc_update(eth_dev, tx_queue_id);
+	rc = cn20k_sso_updt_tx_adptr_data(event_dev);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	dev->tx_adptr_configured = 1;
+
+	return 0;
+}
+
+static int
+cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+	return cn20k_sso_updt_tx_adptr_data(event_dev);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -659,6 +778,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
+	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
+	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
+	.eth_tx_adapter_start = cnxk_sso_tx_adapter_start,
+	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
+	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 7a6363a89e..8ea2878fa5 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -25,6 +25,10 @@ struct __rte_cache_aligned cn20k_sso_hws {
 	uintptr_t grp_base;
 	uint16_t xae_waes;
 	int32_t xaq_lmt;
+	/* Tx Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
+	uint64_t lso_tun_fmt;
+	uint8_t tx_adptr_data[];
 };
 
 #endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
new file mode 100644
index 0000000000..63fbdf5328
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_TX_WORKER_H__
+#define __CN20K_TX_WORKER_H__
+
+#include <rte_eventdev.h>
+#include <rte_vect.h>
+
+#include "cn20k_eventdev.h"
+#include "cn20k_tx.h"
+#include "cnxk_eventdev_dp.h"
+#include <rte_event_eth_tx_adapter.h>
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 18/22] event/cnxk: support CN20K Tx adapter fast path
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (15 preceding siblings ...)
  2024-10-22 19:35       ` [PATCH v4 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
@ 2024-10-22 19:35       ` pbhagavatula
  2024-10-22 19:35       ` [PATCH v4 19/22] common/cnxk: add SSO event aggregator pbhagavatula
                         ` (5 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:35 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c          |  29 +++
 drivers/event/cnxk/cn20k_tx_worker.h         | 176 +++++++++++++++++++
 drivers/event/cnxk/meson.build               |  20 +++
 drivers/event/cnxk/tx/cn20k/tx_0_15.c        |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c    |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127.c     |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111.c      |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c  |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_all_offload.c |  40 +++++
 20 files changed, 561 insertions(+)
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 509c6ea630..5d49a5e5c6 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -15,6 +15,9 @@
 #define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
 
+#define CN20K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                                               \
+	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -253,6 +256,19 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 #undef R
 	};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq_seg[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_seg_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
 	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
 		if (dev->rx_offloads & NIX_RX_REAS_F) {
 			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
@@ -285,6 +301,12 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 		}
 	}
 
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq_seg);
+	else
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq);
+
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 #else
 	RTE_SET_USED(event_dev);
 #endif
@@ -299,6 +321,13 @@ cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
 	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
 	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
 		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+	event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	if (dev->tx_offloads & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | NIX_TX_OFFLOAD_VLAN_QINQ_F |
+				NIX_TX_OFFLOAD_TSO_F | NIX_TX_OFFLOAD_TSTAMP_F)) {
+		event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+		event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+	}
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 63fbdf5328..c8ab560b0e 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -13,4 +13,180 @@
 #include "cnxk_eventdev_dp.h"
 #include <rte_event_eth_tx_adapter.h>
 
+/* CN20K Tx event fastpath */
+
+static __rte_always_inline struct cn20k_eth_txq *
+cn20k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
+{
+	return (struct cn20k_eth_txq *)(txq_data[(txq_data[m->port] >> 48) +
+						 rte_event_eth_tx_adapter_txq_get(m)] &
+					(BIT_ULL(48) - 1));
+}
+
+static __rte_always_inline void
+cn20k_sso_txq_fc_wait(const struct cn20k_eth_txq *txq)
+{
+	int64_t avail;
+
+#ifdef RTE_ARCH_ARM64
+	int64_t val;
+
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.gt .Ldne%=				\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.le .Lrty%=				\n"
+		     ".Ldne%=:						\n"
+		     : [refill] "=&r"(avail), [val] "=&r" (val)
+		     : [addr] "r" (txq->fc_mem), [adj] "r" (txq->nb_sqb_bufs_adj),
+		       [shft] "r" (txq->sqes_per_sqb_log2)
+		     : "memory");
+#else
+	do {
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
+	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
+#endif
+}
+
+static __rte_always_inline int32_t
+cn20k_sso_sq_depth(const struct cn20k_eth_txq *txq)
+{
+	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
+	return (avail << txq->sqes_per_sqb_log2) - avail;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
+		 uintptr_t lmt_addr, uint8_t sched_type, const uint64_t *txq_data,
+		 const uint32_t flags)
+{
+	uint8_t lnum = 0, loff = 0, shft = 0;
+	struct rte_mbuf *extm = NULL;
+	struct cn20k_eth_txq *txq;
+	uintptr_t laddr;
+	uint16_t segdw;
+	uintptr_t pa;
+	bool sec;
+
+	txq = cn20k_sso_hws_xtract_meta(m, txq_data);
+	if (cn20k_sso_sq_depth(txq) <= 0)
+		return 0;
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1);
+
+	cn20k_nix_tx_skeleton(txq, cmd, flags, 0);
+	/* Perform header writes before barrier
+	 * for TSO
+	 */
+	if (flags & NIX_TX_OFFLOAD_TSO_F)
+		cn20k_nix_xmit_prepare_tso(m, flags);
+
+	cn20k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, &sec, txq->mark_flag,
+			       txq->mark_fmt);
+
+	laddr = lmt_addr;
+	/* Prepare CPT instruction and get nixtx addr if
+	 * it is for CPT on same lmtline.
+	 */
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff, &shft, txq->sa_base,
+				   flags);
+
+	/* Move NIX desc to LMT/NIXTX area */
+	cn20k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
+
+	if (flags & NIX_TX_MULTI_SEG_F)
+		segdw = cn20k_nix_prepare_mseg(txq, m, &extm, (uint64_t *)laddr, flags);
+	else
+		segdw = cn20k_nix_tx_ext_subs(flags) + 2;
+
+	cn20k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		pa = txq->cpt_io_addr | 3 << 4;
+	else
+		pa = txq->io_addr | ((segdw - 1) << 4);
+
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
+
+	cn20k_sso_txq_fc_wait(txq);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_sec_fc_wait_one(txq);
+
+	roc_lmt_submit_steorl(lmt_id, pa);
+
+	/* Memory barrier to make sure lmtst store completes */
+	rte_io_wmb();
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+		cn20k_nix_free_extmbuf(extm);
+
+	return 1;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
+		       const uint64_t *txq_data, const uint32_t flags)
+{
+	struct rte_mbuf *m;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+
+	lmt_addr = ws->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	m = ev->mbuf;
+	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
+}
+
+#define T(name, sz, flags)                                                                         \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_##name(void *port, struct rte_event ev[],    \
+							     uint16_t nb_events);                  \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
+#define SSO_TX(fn, sz, flags)                                                                      \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint64_t cmd[sz];                                                                  \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data, flags);         \
+	}
+
+#define SSO_TX_SEG(fn, sz, flags)                                                                  \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];                               \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data,                 \
+					      (flags) | NIX_TX_MULTI_SEG_F);                       \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[],
+							      uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+								  uint16_t nb_events);
+
 #endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index a2bafab268..8aaf8116f7 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -272,9 +272,29 @@ sources += files(
         'deq/cn20k/deq_all_offload.c',
 )
 
+sources += files(
+        'tx/cn20k/tx_0_15.c',
+        'tx/cn20k/tx_16_31.c',
+        'tx/cn20k/tx_32_47.c',
+        'tx/cn20k/tx_48_63.c',
+        'tx/cn20k/tx_64_79.c',
+        'tx/cn20k/tx_80_95.c',
+        'tx/cn20k/tx_96_111.c',
+        'tx/cn20k/tx_112_127.c',
+        'tx/cn20k/tx_0_15_seg.c',
+        'tx/cn20k/tx_16_31_seg.c',
+        'tx/cn20k/tx_32_47_seg.c',
+        'tx/cn20k/tx_48_63_seg.c',
+        'tx/cn20k/tx_64_79_seg.c',
+        'tx/cn20k/tx_80_95_seg.c',
+        'tx/cn20k/tx_96_111_seg.c',
+        'tx/cn20k/tx_112_127_seg.c',
+        'tx/cn20k/tx_all_offload.c',
+)
 else
 sources += files(
         'deq/cn20k/deq_all_offload.c',
+        'tx/cn20k/tx_all_offload.c',
 )
 endif
 endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15.c b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
new file mode 100644
index 0000000000..b681bc8ab0
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
new file mode 100644
index 0000000000..1dacb63d4b
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127.c b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
new file mode 100644
index 0000000000..abdb8b76a1
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
new file mode 100644
index 0000000000..c39d331b25
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31.c b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
new file mode 100644
index 0000000000..5b88c47914
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
new file mode 100644
index 0000000000..13f00ac478
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47.c b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
new file mode 100644
index 0000000000..1f6008c425
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
new file mode 100644
index 0000000000..587f22df3a
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63.c b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
new file mode 100644
index 0000000000..c712825417
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
new file mode 100644
index 0000000000..1fc11ec904
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79.c b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
new file mode 100644
index 0000000000..0e427f79d8
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
new file mode 100644
index 0000000000..6e1ae41b26
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95.c b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
new file mode 100644
index 0000000000..8c87d2341d
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
new file mode 100644
index 0000000000..43a143f4bd
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111.c b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
new file mode 100644
index 0000000000..1a43af8b02
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
new file mode 100644
index 0000000000..e0e1d8a4ef
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_all_offload.c b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
new file mode 100644
index 0000000000..d2158a4256
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[], uint16_t nb_events)
+{
+	const uint32_t flags = (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_MBUF_NOFF_F |
+				NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+					       uint16_t nb_events)
+{
+	const uint32_t flags =
+		(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+		 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_MBUF_NOFF_F | NIX_TX_OFFLOAD_TSO_F |
+		 NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_SECURITY_F | NIX_TX_MULTI_SEG_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 19/22] common/cnxk: add SSO event aggregator
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (16 preceding siblings ...)
  2024-10-22 19:35       ` [PATCH v4 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
@ 2024-10-22 19:35       ` pbhagavatula
  2024-10-22 19:35       ` [PATCH v4 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
                         ` (4 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:35 UTC (permalink / raw)
  To: jerinj, stephen, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add configuration APIs for CN20K SSO event
aggregator which allows SSO to generate event
vectors.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/sso.h        |  33 ++++
 drivers/common/cnxk/roc_mbox.h      |  21 +++
 drivers/common/cnxk/roc_model.h     |  13 ++
 drivers/common/cnxk/roc_nix_queue.c |   5 -
 drivers/common/cnxk/roc_sso.c       | 230 +++++++++++++++++++++++++++-
 drivers/common/cnxk/roc_sso.h       |  19 ++-
 drivers/common/cnxk/roc_sso_priv.h  |   4 +
 drivers/common/cnxk/version.map     |   4 +
 8 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/drivers/common/cnxk/hw/sso.h b/drivers/common/cnxk/hw/sso.h
index 09b8d4955f..79337a8a3b 100644
--- a/drivers/common/cnxk/hw/sso.h
+++ b/drivers/common/cnxk/hw/sso.h
@@ -146,6 +146,7 @@
 #define SSO_LF_GGRP_OP_ADD_WORK0 (0x0ull)
 #define SSO_LF_GGRP_OP_ADD_WORK1 (0x8ull)
 #define SSO_LF_GGRP_QCTL	 (0x20ull)
+#define SSO_LF_GGRP_TAG_CFG	 (0x40ull)
 #define SSO_LF_GGRP_EXE_DIS	 (0x80ull)
 #define SSO_LF_GGRP_INT		 (0x100ull)
 #define SSO_LF_GGRP_INT_W1S	 (0x108ull)
@@ -159,6 +160,10 @@
 #define SSO_LF_GGRP_MISC_CNT	 (0x200ull)
 #define SSO_LF_GGRP_OP_AW_LMTST	 (0x400ull)
 
+#define SSO_LF_GGRP_AGGR_CFG	    (0x300ull)
+#define SSO_LF_GGRP_AGGR_CTX_BASE   (0x308ull)
+#define SSO_LF_GGRP_AGGR_CTX_INSTOP (0x310ull)
+
 #define SSO_AF_IAQ_FREE_CNT_MASK      0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_MASK     0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_SHIFT    16
@@ -230,5 +235,33 @@
 #define SSO_TT_ATOMIC	(0x1ull)
 #define SSO_TT_UNTAGGED (0x2ull)
 #define SSO_TT_EMPTY	(0x3ull)
+#define SSO_TT_AGG	(0x3ull)
+
+#define SSO_LF_AGGR_INSTOP_FLUSH	(0x0ull)
+#define SSO_LF_AGGR_INSTOP_EVICT	(0x1ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_FLUSH (0x2ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_EVICT (0x3ull)
+
+#define SSO_AGGR_CTX_SZ	    16
+#define SSO_AGGR_NUM_CTX(a) (1 << (a + 6))
+#define SSO_AGGR_MIN_CTX    SSO_AGGR_NUM_CTX(0)
+#define SSO_AGGR_MAX_CTX    SSO_AGGR_NUM_CTX(10)
+#define SSO_AGGR_DEF_TMO    0x3Full
+
+struct sso_agq_ctx {
+	uint64_t ena : 1;
+	uint64_t rsvd_1_3 : 3;
+	uint64_t vwqe_aura : 17;
+	uint64_t rsvd_21_31 : 11;
+	uint64_t tag : 32;
+	uint64_t tt : 2;
+	uint64_t rsvd_66_67 : 2;
+	uint64_t swqe_tag : 12;
+	uint64_t max_vsize_exp : 4;
+	uint64_t vtimewait : 12;
+	uint64_t xqe_type : 4;
+	uint64_t cnt_ena : 1;
+	uint64_t rsvd_101_127 : 27;
+};
 
 #endif /* __SSO_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 63139b5517..db6e8f07b3 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,10 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_AGGR_SET_CONFIG, 0x615, sso_aggr_setconfig, sso_aggr_setconfig,  \
+	  msg_rsp)                                                             \
+	M(SSO_AGGR_GET_STATS, 0x616, sso_aggr_get_stats, sso_info_req,         \
+	  sso_aggr_stats)                                                      \
 	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
@@ -2191,6 +2195,13 @@ struct sso_grp_stash_cfg {
 	uint8_t __io num_linesm1 : 4;
 };
 
+struct sso_aggr_setconfig {
+	struct mbox_msghdr hdr;
+	uint16_t __io npa_pf_func;
+	uint16_t __io hwgrp;
+	uint64_t __io rsvd[2];
+};
+
 struct sso_grp_stats {
 	struct mbox_msghdr hdr;
 	uint16_t __io grp;
@@ -2210,6 +2221,16 @@ struct sso_hws_stats {
 	uint64_t __io arbitration;
 };
 
+struct sso_aggr_stats {
+	struct mbox_msghdr hdr;
+	uint16_t __io grp;
+	uint64_t __io flushed;
+	uint64_t __io completed;
+	uint64_t __io npa_fail;
+	uint64_t __io timeout;
+	uint64_t __io rsvd[4];
+};
+
 /* CPT mailbox error codes
  * Range 901 - 1000.
  */
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 4e686bea2c..0de141b0cc 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include "roc_bits.h"
+#include "roc_constants.h"
 
 extern struct roc_model *roc_model;
 
@@ -157,6 +158,18 @@ roc_model_is_cn20k(void)
 	return roc_model_runtime_is_cn20k();
 }
 
+static inline uint16_t
+roc_model_optimal_align_sz(void)
+{
+	if (roc_model_is_cn9k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn10k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn20k())
+		return ROC_ALIGN << 1;
+	return 128;
+}
+
 static inline uint64_t
 roc_model_is_cn98xx(void)
 {
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 06029275af..e852211ba4 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -794,9 +794,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 		aq->rq.good_utag = rq->tag_mask >> 24;
 		aq->rq.bad_utag = rq->tag_mask >> 24;
 		aq->rq.ltag = rq->tag_mask & BITMASK_ULL(24, 0);
-
-		if (rq->vwqe_ena)
-			aq->rq.wqe_aura = roc_npa_aura_handle_to_aura(rq->vwqe_aura_handle);
 	} else {
 		/* CQ mode */
 		aq->rq.sso_ena = 0;
@@ -881,8 +878,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 			aq->rq_mask.good_utag = ~aq->rq_mask.good_utag;
 			aq->rq_mask.bad_utag = ~aq->rq_mask.bad_utag;
 			aq->rq_mask.ltag = ~aq->rq_mask.ltag;
-			if (rq->vwqe_ena)
-				aq->rq_mask.wqe_aura = ~aq->rq_mask.wqe_aura;
 		} else {
 			/* CQ mode */
 			aq->rq_mask.sso_ena = ~aq->rq_mask.sso_ena;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 45cf6fc39e..4996329018 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -500,9 +500,231 @@ roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 	mbox_put(mbox);
 }
 
+static void
+sso_agq_op_wait(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	uint64_t reg;
+
+	reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	while (reg & BIT_ULL(2)) {
+		plt_delay_us(100);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) +
+				 SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	}
+}
+
+int
+roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_agq_data *data)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	uint32_t cnt, off;
+	struct mbox *mbox;
+	uintptr_t ptr;
+	uint64_t reg;
+	int rc;
+
+	if (sso->agg_mem[hwgrp] == 0) {
+		mbox = mbox_get(sso->dev.mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			mbox_process(mbox);
+			req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+			if (req == NULL) {
+				plt_err("Failed to allocate AGQ config mbox.");
+				mbox_put(mbox);
+				return -EIO;
+			}
+		}
+
+		req->hwgrp = hwgrp;
+		req->npa_pf_func = idev_npa_pffunc_get();
+		rc = mbox_process(mbox);
+		if (rc < 0) {
+			plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+			mbox_put(mbox);
+			return rc;
+		}
+
+		mbox_put(mbox);
+
+		sso->agg_mem[hwgrp] =
+			(uintptr_t)plt_zmalloc(SSO_AGGR_MIN_CTX * sizeof(struct sso_agq_ctx),
+					       roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0)
+			return -ENOMEM;
+		sso->agg_cnt[hwgrp] = SSO_AGGR_MIN_CTX;
+		sso->agg_used[hwgrp] = 0;
+		plt_wmb();
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = (plt_log2_u32(SSO_AGGR_MIN_CTX) - 6) << 16;
+		reg |= (SSO_AGGR_DEF_TMO << 4) | 1;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	}
+
+	if (sso->agg_cnt[hwgrp] >= SSO_AGGR_MAX_CTX)
+		return -ENOSPC;
+
+	if (sso->agg_cnt[hwgrp] == sso->agg_used[hwgrp]) {
+		ptr = sso->agg_mem[hwgrp];
+		cnt = sso->agg_cnt[hwgrp] << 1;
+		sso->agg_mem[hwgrp] = (uintptr_t)plt_zmalloc(cnt * sizeof(struct sso_agq_ctx),
+							     roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0) {
+			sso->agg_mem[hwgrp] = ptr;
+			return -ENOMEM;
+		}
+
+		memcpy((void *)sso->agg_mem[hwgrp], (void *)ptr,
+		       sso->agg_cnt[hwgrp] * sizeof(struct sso_agq_ctx));
+		plt_wmb();
+		sso_agq_op_wait(roc_sso, hwgrp);
+		/* Base address has changed, evict old entries. */
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg &= ~GENMASK_ULL(19, 16);
+		reg |= (uint64_t)(plt_log2_u32(cnt) - 6) << 16;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg = SSO_LF_AGGR_INSTOP_GLOBAL_EVICT << 4;
+		plt_write64(reg,
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+		sso_agq_op_wait(roc_sso, hwgrp);
+		plt_free((void *)ptr);
+
+		sso->agg_cnt[hwgrp] = cnt;
+		off = sso->agg_used[hwgrp];
+	} else {
+		ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+		for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+			if (!ctx[cnt].ena)
+				break;
+		}
+		if (cnt == sso->agg_cnt[hwgrp])
+			return -EINVAL;
+		off = cnt;
+	}
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += off;
+	ctx->ena = 1;
+	ctx->tt = data->tt;
+	ctx->tag = data->tag;
+	ctx->swqe_tag = data->stag;
+	ctx->cnt_ena = data->cnt_ena;
+	ctx->xqe_type = data->xqe_type;
+	ctx->vtimewait = data->vwqe_wait_tmo;
+	ctx->vwqe_aura = data->vwqe_aura;
+	ctx->max_vsize_exp = data->vwqe_max_sz_exp - 2;
+
+	plt_wmb();
+	sso->agg_used[hwgrp]++;
+
+	return 0;
+}
+
+void
+roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint64_t reg;
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += agq_id;
+
+	if (!ctx->ena)
+		return;
+
+	reg = SSO_LF_AGGR_INSTOP_FLUSH << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+
+	memset(ctx, 0, sizeof(struct sso_agq_ctx));
+	plt_wmb();
+	sso->agg_used[hwgrp]--;
+
+	/* Flush the context from CTX Cache */
+	reg = SSO_LF_AGGR_INSTOP_EVICT << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+}
+
+void
+roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	struct mbox *mbox;
+	uint32_t cnt;
+	int rc;
+
+	if (!roc_sso->feat.eva_present)
+		return;
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+		if (!ctx[cnt].ena)
+			continue;
+		roc_sso_hwgrp_agq_free(roc_sso, hwgrp, cnt);
+	}
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+	plt_free((void *)sso->agg_mem[hwgrp]);
+	sso->agg_mem[hwgrp] = 0;
+	sso->agg_cnt[hwgrp] = 0;
+	sso->agg_used[hwgrp] = 0;
+
+	mbox = mbox_get(sso->dev.mbox);
+	req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+	if (req == NULL) {
+		mbox_process(mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			plt_err("Failed to allocate AGQ config mbox.");
+			mbox_put(mbox);
+			return;
+		}
+	}
+
+	req->hwgrp = hwgrp;
+	req->npa_pf_func = 0;
+	rc = mbox_process(mbox);
+	if (rc < 0)
+		plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+	mbox_put(mbox);
+}
+
+uint32_t
+roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag_mask,
+			   uint8_t xqe_type)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint32_t i;
+
+	plt_rmb();
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (i = 0; i < sso->agg_used[hwgrp]; i++) {
+		if (!ctx[i].ena)
+			continue;
+		if (ctx[i].tag == tag_mask && ctx[i].xqe_type == xqe_type)
+			return i;
+	}
+
+	return UINT32_MAX;
+}
+
 int
-roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
-			struct roc_sso_hwgrp_stats *stats)
+roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_hwgrp_stats *stats)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
 	struct sso_grp_stats *req_rsp;
@@ -1058,10 +1280,14 @@ void
 roc_sso_rsrc_fini(struct roc_sso *roc_sso)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	uint32_t cnt;
 
 	if (!roc_sso->nb_hws && !roc_sso->nb_hwgrp)
 		return;
 
+	for (cnt = 0; cnt < roc_sso->nb_hwgrp; cnt++)
+		roc_sso_hwgrp_agq_release(roc_sso, cnt);
+
 	sso_unregister_irqs_priv(roc_sso, sso->pci_dev->intr_handle,
 				 roc_sso->nb_hws, roc_sso->nb_hwgrp);
 	sso_lf_free(&sso->dev, SSO_LF_TYPE_HWS, roc_sso->nb_hws);
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 021db22c86..f73128087a 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -47,6 +47,17 @@ struct roc_sso_xaq_data {
 	void *mem;
 };
 
+struct roc_sso_agq_data {
+	uint8_t tt;
+	uint8_t cnt_ena;
+	uint8_t xqe_type;
+	uint16_t stag;
+	uint32_t tag;
+	uint32_t vwqe_max_sz_exp;
+	uint64_t vwqe_wait_tmo;
+	uint64_t vwqe_aura;
+};
+
 struct roc_sso {
 	struct plt_pci_device *pci_dev;
 	/* Public data. */
@@ -100,6 +111,12 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 					 uint16_t nb_stash);
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
+int __roc_api roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp,
+				      struct roc_sso_agq_data *data);
+void __roc_api roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id);
+void __roc_api roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp);
+uint32_t __roc_api roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag,
+					      uint8_t xqe_type);
 
 /* Utility function */
 uint16_t __roc_api roc_sso_pf_func_get(void);
@@ -107,7 +124,7 @@ uint16_t __roc_api roc_sso_pf_func_get(void);
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
-int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
+int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp,
 				      struct roc_sso_hwgrp_stats *stats);
 int __roc_api roc_sso_hws_stats_get(struct roc_sso *roc_sso, uint8_t hws,
 				    struct roc_sso_hws_stats *stats);
diff --git a/drivers/common/cnxk/roc_sso_priv.h b/drivers/common/cnxk/roc_sso_priv.h
index 21c59c57e6..d6dc6dedd3 100644
--- a/drivers/common/cnxk/roc_sso_priv.h
+++ b/drivers/common/cnxk/roc_sso_priv.h
@@ -13,6 +13,10 @@ struct sso_rsrc {
 struct sso {
 	struct plt_pci_device *pci_dev;
 	struct dev dev;
+	/* EVA memory area */
+	uintptr_t agg_mem[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_used[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_cnt[MAX_RVU_BLKLF_CNT];
 	/* Interrupt handler args. */
 	struct sso_rsrc hws_rsrc[MAX_RVU_BLKLF_CNT];
 	struct sso_rsrc hwgrp_rsrc[MAX_RVU_BLKLF_CNT];
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index de748ac409..14ee6031e2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -500,6 +500,10 @@ INTERNAL {
 	roc_sso_dev_fini;
 	roc_sso_dev_init;
 	roc_sso_dump;
+	roc_sso_hwgrp_agq_alloc;
+	roc_sso_hwgrp_agq_free;
+	roc_sso_hwgrp_agq_from_tag;
+	roc_sso_hwgrp_agq_release;
 	roc_sso_hwgrp_alloc_xaq;
 	roc_sso_hwgrp_base_get;
 	roc_sso_hwgrp_free_xaq_aura;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 20/22] event/cnxk: add Rx/Tx event vector support
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (17 preceding siblings ...)
  2024-10-22 19:35       ` [PATCH v4 19/22] common/cnxk: add SSO event aggregator pbhagavatula
@ 2024-10-22 19:35       ` pbhagavatula
  2024-10-22 19:35       ` [PATCH v4 21/22] common/cnxk: update timer base code pbhagavatula
                         ` (3 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:35 UTC (permalink / raw)
  To: jerinj, stephen, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add Event vector support for CN20K Rx/Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c      | 185 ++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_tx_worker.h     |  84 ++++++++++
 drivers/event/cnxk/cn20k_worker.h        |  63 ++++++++
 drivers/event/cnxk/cnxk_eventdev.h       |   3 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  16 +-
 5 files changed, 340 insertions(+), 11 deletions(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 5d49a5e5c6..57e15b6d8c 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -75,6 +75,7 @@ cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = dev->sso.lmt_base;
 	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->lmt_base = dev->sso.lmt_base;
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -595,7 +596,8 @@ cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
 	else
 		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
 			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
-			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -641,6 +643,156 @@ cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
 	eventdev_fops_tstamp_update(event_dev);
 }
 
+static int
+cn20k_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, uint16_t port_id,
+		     const struct rte_event_eth_rx_adapter_queue_conf *queue_conf, int agq)
+{
+	struct roc_nix_rq *rq;
+	uint32_t tag_mask;
+	uint16_t wqe_skip;
+	uint8_t tt;
+	int rc;
+
+	rq = &cnxk_eth_dev->rqs[rq_id];
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+		tag_mask = agq;
+		tt = SSO_TT_AGG;
+		rq->flow_tag_width = 0;
+	} else {
+		tag_mask = (port_id & 0xFF) << 20;
+		tag_mask |= (RTE_EVENT_TYPE_ETHDEV << 28);
+		tt = queue_conf->ev.sched_type;
+		rq->flow_tag_width = 20;
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) {
+			rq->flow_tag_width = 0;
+			tag_mask |= queue_conf->ev.flow_id;
+		}
+	}
+
+	rq->tag_mask = tag_mask;
+	rq->sso_ena = 1;
+	rq->tt = tt;
+	rq->hwgrp = queue_conf->ev.queue_id;
+	wqe_skip = RTE_ALIGN_CEIL(sizeof(struct rte_mbuf), ROC_CACHE_LINE_SZ);
+	wqe_skip = wqe_skip / ROC_CACHE_LINE_SZ;
+	rq->wqe_skip = wqe_skip;
+
+	rc = roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+	return rc;
+}
+
+static int
+cn20k_sso_rx_adapter_vwqe_enable(struct cnxk_sso_evdev *dev, uint16_t port_id, uint16_t rq_id,
+				 const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	uint32_t agq, tag_mask, stag_mask;
+	struct roc_sso_agq_data data;
+	int rc;
+
+	tag_mask = (port_id & 0xff) << 20;
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)
+		tag_mask |= queue_conf->ev.flow_id;
+	else
+		tag_mask |= rq_id;
+
+	stag_mask = tag_mask;
+	tag_mask |= RTE_EVENT_TYPE_ETHDEV_VECTOR << 28;
+	stag_mask |= RTE_EVENT_TYPE_ETHDEV << 28;
+
+	memset(&data, 0, sizeof(struct roc_sso_agq_data));
+	data.tag = tag_mask;
+	data.tt = queue_conf->ev.sched_type;
+	data.stag = stag_mask;
+	data.vwqe_aura = roc_npa_aura_handle_to_aura(queue_conf->vector_mp->pool_id);
+	data.vwqe_max_sz_exp = rte_log2_u32(queue_conf->vector_sz);
+	data.vwqe_wait_tmo = queue_conf->vector_timeout_ns / ((SSO_AGGR_DEF_TMO + 1) * 100);
+	data.xqe_type = 0;
+
+	rc = roc_sso_hwgrp_agq_alloc(&dev->sso, queue_conf->ev.queue_id, &data);
+	if (rc < 0)
+		return rc;
+
+	agq = roc_sso_hwgrp_agq_from_tag(&dev->sso, queue_conf->ev.queue_id, tag_mask, 0);
+	return agq;
+}
+
+static int
+cn20k_rx_adapter_queue_add(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id,
+			   const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t port = eth_dev->data->port_id;
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	int i, rc = 0, agq = 0;
+
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			rc |= cn20k_rx_adapter_queue_add(event_dev, eth_dev, i, queue_conf);
+	} else {
+		rxq_sp = cnxk_eth_rxq_to_sp(eth_dev->data->rx_queues[rx_queue_id]);
+		cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+		rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+			cnxk_sso_updt_xae_cnt(dev, queue_conf->vector_mp,
+					      RTE_EVENT_TYPE_ETHDEV_VECTOR);
+			rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+			if (rc < 0)
+				return rc;
+
+			rc = cn20k_sso_rx_adapter_vwqe_enable(dev, port, rx_queue_id, queue_conf);
+			if (rc < 0)
+				return rc;
+			agq = rc;
+		}
+
+		rc = cn20k_sso_rxq_enable(cnxk_eth_dev, (uint16_t)rx_queue_id, port, queue_conf,
+					  agq);
+
+		/* Propagate force bp devarg */
+		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
+		cnxk_sso_tstamp_cfg(port, eth_dev, dev);
+		cnxk_eth_dev->nb_rxq_sso++;
+	}
+
+	if (rc < 0) {
+		plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+			queue_conf->ev.queue_id);
+		return rc;
+	}
+
+	dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+	return 0;
+}
+
+static int
+cn20k_rx_adapter_queue_del(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_nix_rq *rxq;
+	int i, rc = 0;
+
+	RTE_SET_USED(event_dev);
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			cn20k_rx_adapter_queue_del(event_dev, eth_dev, i);
+	} else {
+		rxq = &cnxk_eth_dev->rqs[rx_queue_id];
+		if (rxq->tt == SSO_TT_AGG)
+			roc_sso_hwgrp_agq_free(&dev->sso, rxq->hwgrp, rxq->tag_mask);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
+		cnxk_eth_dev->nb_rxq_sso--;
+	}
+
+	if (rc < 0)
+		plt_err("Failed to clear Rx adapter config port=%d, q=%d", eth_dev->data->port_id,
+			rx_queue_id);
+	return rc;
+}
+
 static int
 cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
@@ -657,7 +809,7 @@ cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	rc = cn20k_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
 	if (rc)
 		return -EINVAL;
 
@@ -690,7 +842,29 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+	return cn20k_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
+static int
+cn20k_sso_rx_adapter_vector_limits(const struct rte_eventdev *dev,
+				   const struct rte_eth_dev *eth_dev,
+				   struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	RTE_SET_USED(eth_dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		return -ENOTSUP;
+
+	limits->log2_sz = true;
+	limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+	limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+	limits->min_timeout_ns = (SSO_AGGR_DEF_TMO + 1) * 100;
+	limits->max_timeout_ns = (BITMASK_ULL(11, 0) + 1) * limits->min_timeout_ns;
+
+	return 0;
 }
 
 static int
@@ -704,7 +878,8 @@ cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_e
 	if (ret)
 		*caps = 0;
 	else
-		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -807,6 +982,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_rx_adapter_vector_limits_get = cn20k_sso_rx_adapter_vector_limits,
+
 	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
 	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
 	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index c8ab560b0e..b09d845b09 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -139,10 +139,58 @@ cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, ui
 	return 1;
 }
 
+static __rte_always_inline uint16_t
+cn20k_sso_vwqe_split_tx(struct cn20k_sso_hws *ws, struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+			uint64_t *cmd, const uint64_t *txq_data, const uint32_t flags)
+{
+	uint16_t count = 0, port, queue, ret = 0, last_idx = 0;
+	struct cn20k_eth_txq *txq;
+	int32_t space;
+	int i;
+
+	port = mbufs[0]->port;
+	queue = rte_event_eth_tx_adapter_txq_get(mbufs[0]);
+	for (i = 0; i < nb_mbufs; i++) {
+		if (port != mbufs[i]->port || queue != rte_event_eth_tx_adapter_txq_get(mbufs[i])) {
+			if (count) {
+				txq = (struct cn20k_eth_txq
+					       *)(txq_data[(txq_data[port] >> 48) + queue] &
+						  (BIT_ULL(48) - 1));
+				/* Transmit based on queue depth */
+				space = cn20k_sso_sq_depth(txq);
+				if (space < count)
+					goto done;
+				cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx],
+							   count, cmd, flags | NIX_TX_VWQE_F);
+				ret += count;
+				count = 0;
+			}
+			port = mbufs[i]->port;
+			queue = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+			last_idx = i;
+		}
+		count++;
+	}
+	if (count) {
+		txq = (struct cn20k_eth_txq *)(txq_data[(txq_data[port] >> 48) + queue] &
+					       (BIT_ULL(48) - 1));
+		/* Transmit based on queue depth */
+		space = cn20k_sso_sq_depth(txq);
+		if (space < count)
+			goto done;
+		cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx], count, cmd,
+					   flags | NIX_TX_VWQE_F);
+		ret += count;
+	}
+done:
+	return ret;
+}
+
 static __rte_always_inline uint16_t
 cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
 		       const uint64_t *txq_data, const uint32_t flags)
 {
+	struct cn20k_eth_txq *txq;
 	struct rte_mbuf *m;
 	uintptr_t lmt_addr;
 	uint16_t lmt_id;
@@ -150,6 +198,42 @@ cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t
 	lmt_addr = ws->lmt_base;
 	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
 
+	if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+		struct rte_mbuf **mbufs = ev->vec->mbufs;
+		uint64_t meta = *(uint64_t *)ev->vec;
+		uint16_t offset, nb_pkts, left;
+		int32_t space;
+
+		nb_pkts = meta & 0xFFFF;
+		offset = (meta >> 16) & 0xFFF;
+		if (meta & BIT(31)) {
+			txq = (struct cn20k_eth_txq
+				       *)(txq_data[(txq_data[meta >> 32] >> 48) + (meta >> 48)] &
+					  (BIT_ULL(48) - 1));
+
+			/* Transmit based on queue depth */
+			space = cn20k_sso_sq_depth(txq);
+			if (space <= 0)
+				return 0;
+			nb_pkts = nb_pkts < space ? nb_pkts : (uint16_t)space;
+			cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs + offset, nb_pkts,
+						   cmd, flags | NIX_TX_VWQE_F);
+		} else {
+			nb_pkts = cn20k_sso_vwqe_split_tx(ws, mbufs + offset, nb_pkts, cmd,
+							  txq_data, flags);
+		}
+		left = (meta & 0xFFFF) - nb_pkts;
+
+		if (!left) {
+			rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+		} else {
+			*(uint64_t *)ev->vec =
+				(meta & ~0xFFFFFFFUL) | (((uint32_t)nb_pkts + offset) << 16) | left;
+		}
+		rte_prefetch0(ws);
+		return !left;
+	}
+
 	m = ev->mbuf;
 	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
 }
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 9075073fd2..5799e5cc49 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -41,6 +41,58 @@ cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info
 	}
 }
 
+static __rte_always_inline void
+cn20k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, struct cn20k_sso_hws *ws)
+{
+	uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM;
+	struct cnxk_timesync_info *tstamp = ws->tstamp[port_id];
+	void *lookup_mem = ws->lookup_mem;
+	uintptr_t lbase = ws->lmt_base;
+	struct rte_event_vector *vec;
+	uint16_t nb_mbufs, non_vec;
+	struct rte_mbuf **wqe;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+	int i;
+
+	mbuf_init |= ((uint64_t)port_id) << 48;
+	vec = (struct rte_event_vector *)vwqe;
+	wqe = vec->mbufs;
+
+	rte_prefetch0(&vec->ptrs[0]);
+#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
+	for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
+		rte_prefetch0(&vec->ptrs[i]);
+
+	if (flags & NIX_RX_OFFLOAD_TSTAMP_F && tstamp)
+		mbuf_init |= 8;
+
+	nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+	nb_mbufs = cn20k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs, flags | NIX_RX_VWQE_F,
+					      lookup_mem, tstamp, lbase, 0);
+	wqe += nb_mbufs;
+	non_vec = vec->nb_elem - nb_mbufs;
+
+	while (non_vec) {
+		struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+
+		mbuf = (struct rte_mbuf *)((char *)cqe - sizeof(struct rte_mbuf));
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp((uint64_t)wqe[0], (uint64_t)mbuf, tstamp);
+		wqe[0] = (struct rte_mbuf *)mbuf;
+		non_vec--;
+		wqe++;
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
@@ -65,6 +117,17 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
 			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
 		u64[1] = mbuf;
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		__uint128_t vwqe_hdr = *(__uint128_t *)u64[1];
+
+		vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | ((vwqe_hdr & 0xFFFF) << 48) |
+			   ((uint64_t)port << 32);
+		*(uint64_t *)u64[1] = (uint64_t)vwqe_hdr;
+		cn20k_process_vwqe(u64[1], port, flags, ws);
+		/* Mark vector mempool object as get */
+		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
+					  1);
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 4066497e6b..33b3538753 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -266,6 +266,9 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
 			      const struct rte_eth_dev *eth_dev);
 int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
 			     const struct rte_eth_dev *eth_dev);
+void cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev,
+			 struct cnxk_sso_evdev *dev);
+int cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id);
 int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
 				  const struct rte_eth_dev *eth_dev,
 				  int32_t tx_queue_id);
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3cac42111a..4cf48db74c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -167,9 +167,10 @@ cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
 	return rc;
 }
 
-static int
-cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+int
+cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
 	struct roc_nix_rq *rq;
 
 	rq = &cnxk_eth_dev->rqs[rq_id];
@@ -209,10 +210,11 @@ cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
 	return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
 }
 
-static void
-cnxk_sso_tstamp_cfg(uint16_t port_id, struct cnxk_eth_dev *cnxk_eth_dev,
-		    struct cnxk_sso_evdev *dev)
+void
+cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev, struct cnxk_sso_evdev *dev)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+
 	if (cnxk_eth_dev->rx_offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP || cnxk_eth_dev->ptp_en)
 		dev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
 }
@@ -263,7 +265,7 @@ cnxk_sso_rx_adapter_queue_add(
 
 		/* Propagate force bp devarg */
 		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
-		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, cnxk_eth_dev, dev);
+		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, eth_dev, dev);
 		cnxk_eth_dev->nb_rxq_sso++;
 	}
 
@@ -290,7 +292,7 @@ cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
 			cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
 	} else {
-		rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
 		cnxk_eth_dev->nb_rxq_sso--;
 
 		/* Enable drop_re if it was disabled earlier */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 21/22] common/cnxk: update timer base code
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (18 preceding siblings ...)
  2024-10-22 19:35       ` [PATCH v4 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
@ 2024-10-22 19:35       ` pbhagavatula
  2024-10-22 19:35       ` [PATCH v4 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
                         ` (2 subsequent siblings)
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:35 UTC (permalink / raw)
  To: jerinj, stephen, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra, Pavan Nikhilesh,
	Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Update event timer base code to support configuring
HW accelerated timer arm and cancel.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/tim.h        |  5 ++
 drivers/common/cnxk/roc_mbox.h      | 38 ++++++++++++-
 drivers/common/cnxk/roc_tim.c       | 84 ++++++++++++++++++++++++++---
 drivers/common/cnxk/roc_tim.h       | 20 +++++--
 drivers/common/cnxk/version.map     |  1 +
 drivers/event/cnxk/cnxk_tim_evdev.h |  5 --
 6 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index 82b094e3dc..75700a11b8 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -47,10 +47,15 @@
 #define TIM_LF_RAS_INT_ENA_W1S	   (0x310)
 #define TIM_LF_RAS_INT_ENA_W1C	   (0x318)
 #define TIM_LF_RING_REL		   (0x400)
+#define TIM_LF_SCHED_TIMER0	   (0x480)
+#define TIM_LF_RING_FIRST_EXPIRY   (0x558)
 
 #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
+#define TIM_MAX_INTERVAL_EXT_TICKS ((1ULL << 34) - 1)
 #define TIM_MAX_BUCKET_SIZE    ((1ULL << 20) - 2)
 #define TIM_MIN_BUCKET_SIZE    1
 #define TIM_BUCKET_WRAP_SIZE   3
+#define TIM_BUCKET_MIN_GAP     1
+#define TIM_NPA_TMO            0xFFFF
 
 #endif /* __TIM_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index db6e8f07b3..8c0e274684 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -164,6 +164,9 @@ struct mbox_msghdr {
 	  tim_intvl_rsp)                                                       \
 	M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req,          \
 	  tim_capture_rsp)                                                     \
+	M(TIM_CONFIG_HWWQE,    0x807, tim_config_hwwqe, tim_cfg_hwwqe_req,     \
+	  msg_rsp)                                                             \
+	M(TIM_GET_HW_INFO,     0x808, tim_get_hw_info, msg_req, tim_hw_info)   \
 	/* CPT mbox IDs (range 0xA00 - 0xBFF) */                               \
 	M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)    \
 	M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)                   \
@@ -2803,6 +2806,7 @@ enum tim_af_status {
 	TIM_AF_INVALID_ENABLE_DONTFREE = -815,
 	TIM_AF_ENA_DONTFRE_NSET_PERIODIC = -816,
 	TIM_AF_RING_ALREADY_DISABLED = -817,
+	TIM_AF_LF_START_SYNC_FAIL = -818,
 };
 
 enum tim_clk_srcs {
@@ -2895,13 +2899,43 @@ struct tim_config_req {
 	uint8_t __io enabledontfreebuffer;
 	uint32_t __io bucketsize;
 	uint32_t __io chunksize;
-	uint32_t __io interval;
+	uint32_t __io interval_lo;
 	uint8_t __io gpioedge;
-	uint8_t __io rsvd[7];
+	uint8_t __io rsvd[3];
+	uint32_t __io interval_hi;
 	uint64_t __io intervalns;
 	uint64_t __io clockfreq;
 };
 
+struct tim_cfg_hwwqe_req {
+	struct mbox_msghdr hdr;
+	uint16_t __io ring;
+	uint8_t __io grp_ena;
+	uint8_t __io hwwqe_ena;
+	uint8_t __io ins_min_gap;
+	uint8_t __io flw_ctrl_ena;
+	uint8_t __io wqe_rd_clr_ena;
+	uint16_t __io grp_tmo_cntr;
+	uint16_t __io npa_tmo_cntr;
+	uint16_t __io result_offset;
+	uint16_t __io event_count_offset;
+	uint64_t __io rsvd[2];
+};
+
+struct tim_feat_info {
+	uint16_t __io rings;
+	uint8_t __io engines;
+	uint8_t __io hwwqe : 1;
+	uint8_t __io intvl_ext : 1;
+	uint8_t __io rsvd8[4];
+	uint64_t __io rsvd[2];
+};
+
+struct tim_hw_info {
+	struct mbox_msghdr hdr;
+	struct tim_feat_info feat;
+};
+
 struct tim_lf_alloc_rsp {
 	struct mbox_msghdr hdr;
 	uint64_t __io tenns_clk;
diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index 83228fb2b6..e326ea0122 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+#define LF_ENABLE_RETRY_CNT 8
+
 static int
 tim_fill_msix(struct roc_tim *roc_tim, uint16_t nb_ring)
 {
@@ -86,8 +88,11 @@ tim_err_desc(int rc)
 	case TIM_AF_RING_ALREADY_DISABLED:
 		plt_err("Ring already stopped");
 		break;
+	case TIM_AF_LF_START_SYNC_FAIL:
+		plt_err("Ring start sync failed.");
+		break;
 	default:
-		plt_err("Unknown Error.");
+		plt_err("Unknown Error: %d", rc);
 	}
 }
 
@@ -123,10 +128,12 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
 	struct mbox *mbox = mbox_get(dev->mbox);
+	uint8_t retry_cnt = LF_ENABLE_RETRY_CNT;
 	struct tim_enable_rsp *rsp;
 	struct tim_ring_req *req;
 	int rc = -ENOSPC;
 
+retry:
 	req = mbox_alloc_msg_tim_enable_ring(mbox);
 	if (req == NULL)
 		goto fail;
@@ -134,6 +141,9 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 
 	rc = mbox_process_msg(dev->mbox, (void **)&rsp);
 	if (rc) {
+		if (rc == TIM_AF_LF_START_SYNC_FAIL && retry_cnt--)
+			goto retry;
+
 		tim_err_desc(rc);
 		rc = -EIO;
 		goto fail;
@@ -183,10 +193,9 @@ roc_tim_lf_base_get(struct roc_tim *roc_tim, uint8_t ring_id)
 }
 
 int
-roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-		  enum roc_tim_clk_src clk_src, uint8_t ena_periodic,
-		  uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
-		  uint32_t interval, uint64_t intervalns, uint64_t clockfreq)
+roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id, enum roc_tim_clk_src clk_src,
+		  uint8_t ena_periodic, uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
+		  uint64_t interval, uint64_t intervalns, uint64_t clockfreq)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
@@ -204,7 +213,8 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	req->clocksource = clk_src;
 	req->enableperiodic = ena_periodic;
 	req->enabledontfreebuffer = ena_dfb;
-	req->interval = interval;
+	req->interval_lo = interval;
+	req->interval_hi = interval >> 32;
 	req->intervalns = intervalns;
 	req->clockfreq = clockfreq;
 	req->gpioedge = TIM_GPIO_LTOH_TRANS;
@@ -220,6 +230,41 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	return rc;
 }
 
+int
+roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id, struct roc_tim_hwwqe_cfg *cfg)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
+	struct dev *dev = &sso->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_cfg_hwwqe_req *req;
+	int rc = -ENOSPC;
+
+	req = mbox_alloc_msg_tim_config_hwwqe(mbox);
+	if (req == NULL)
+		goto fail;
+	req->ring = ring_id;
+	req->hwwqe_ena = cfg->hwwqe_ena;
+	req->grp_ena = cfg->grp_ena;
+	req->grp_tmo_cntr = cfg->grp_tmo_cyc;
+	req->flw_ctrl_ena = cfg->flw_ctrl_ena;
+	req->result_offset = cfg->result_offset;
+	req->event_count_offset = cfg->event_count_offset;
+
+	req->wqe_rd_clr_ena = 1;
+	req->npa_tmo_cntr = TIM_NPA_TMO;
+	req->ins_min_gap = TIM_BUCKET_MIN_GAP;
+
+	rc = mbox_process(mbox);
+	if (rc) {
+		tim_err_desc(rc);
+		rc = -EIO;
+	}
+
+fail:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_lf_interval(struct roc_tim *roc_tim, enum roc_tim_clk_src clk_src,
 		    uint64_t clockfreq, uint64_t *intervalns,
@@ -353,6 +398,31 @@ tim_free_lf_count_get(struct dev *dev, uint16_t *nb_lfs)
 	return 0;
 }
 
+static int
+tim_hw_info_get(struct roc_tim *roc_tim)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_tim->roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_tim_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc != MBOX_MSG_INVALID)
+		mbox_memcpy(&roc_tim->feat, &rsp->feat, sizeof(roc_tim->feat));
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_init(struct roc_tim *roc_tim)
 {
@@ -372,6 +442,8 @@ roc_tim_init(struct roc_tim *roc_tim)
 	PLT_STATIC_ASSERT(sizeof(struct tim) <= TIM_MEM_SZ);
 	nb_lfs = roc_tim->nb_lfs;
 
+	rc = tim_hw_info_get(roc_tim);
+
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
 		plt_tim_dbg("Failed to get TIM resource count");
diff --git a/drivers/common/cnxk/roc_tim.h b/drivers/common/cnxk/roc_tim.h
index f9a9ad1887..2eb6e6962b 100644
--- a/drivers/common/cnxk/roc_tim.h
+++ b/drivers/common/cnxk/roc_tim.h
@@ -19,10 +19,20 @@ enum roc_tim_clk_src {
 	ROC_TIM_CLK_SRC_INVALID,
 };
 
+struct roc_tim_hwwqe_cfg {
+	uint8_t grp_ena;
+	uint8_t hwwqe_ena;
+	uint8_t flw_ctrl_ena;
+	uint16_t grp_tmo_cyc;
+	uint16_t result_offset;
+	uint16_t event_count_offset;
+};
+
 struct roc_tim {
 	struct roc_sso *roc_sso;
 	/* Public data. */
 	uint16_t nb_lfs;
+	struct tim_feat_info feat;
 	/* Private data. */
 #define TIM_MEM_SZ (1 * 1024)
 	uint8_t reserved[TIM_MEM_SZ] __plt_cache_aligned;
@@ -36,11 +46,11 @@ int __roc_api roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id,
 				uint64_t *start_tsc, uint32_t *cur_bkt);
 int __roc_api roc_tim_lf_disable(struct roc_tim *roc_tim, uint8_t ring_id);
 int __roc_api roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-				enum roc_tim_clk_src clk_src,
-				uint8_t ena_periodic, uint8_t ena_dfb,
-				uint32_t bucket_sz, uint32_t chunk_sz,
-				uint32_t interval, uint64_t intervalns,
-				uint64_t clockfreq);
+				enum roc_tim_clk_src clk_src, uint8_t ena_periodic, uint8_t ena_dfb,
+				uint32_t bucket_sz, uint32_t chunk_sz, uint64_t interval,
+				uint64_t intervalns, uint64_t clockfreq);
+int __roc_api roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id,
+				      struct roc_tim_hwwqe_cfg *cfg);
 int __roc_api roc_tim_lf_interval(struct roc_tim *roc_tim,
 				  enum roc_tim_clk_src clk_src,
 				  uint64_t clockfreq, uint64_t *intervalns,
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 14ee6031e2..e7381ae8b2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -528,6 +528,7 @@ INTERNAL {
 	roc_tim_lf_alloc;
 	roc_tim_lf_base_get;
 	roc_tim_lf_config;
+	roc_tim_lf_config_hwwqe;
 	roc_tim_lf_disable;
 	roc_tim_lf_enable;
 	roc_tim_lf_free;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index f4c61dfb44..c5b3d67eb8 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -16,11 +16,6 @@
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
 
-#include "hw/tim.h"
-
-#include "roc_model.h"
-#include "roc_tim.h"
-
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
 #define TICK2NSEC(__tck, __freq) (((__tck)*NSECPERSEC) / (__freq))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v4 22/22] event/cnxk: add CN20K timer adapter
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (19 preceding siblings ...)
  2024-10-22 19:35       ` [PATCH v4 21/22] common/cnxk: update timer base code pbhagavatula
@ 2024-10-22 19:35       ` pbhagavatula
  2024-10-25  6:37       ` [PATCH v4 01/22] event/cnxk: use stdatomic API Jerin Jacob
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
  22 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-22 19:35 UTC (permalink / raw)
  To: jerinj, stephen, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra, Pavan Nikhilesh,
	Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event timer adapter support for CN20K platform.
Implement new HWWQE insertion feature supported by CN20K platform.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_tim.c        |   6 +-
 drivers/event/cnxk/cn20k_eventdev.c  |  16 ++-
 drivers/event/cnxk/cn20k_worker.h    |   6 +
 drivers/event/cnxk/cnxk_tim_evdev.c  |  37 ++++-
 drivers/event/cnxk/cnxk_tim_evdev.h  |  14 ++
 drivers/event/cnxk/cnxk_tim_worker.c |  82 +++++++++--
 drivers/event/cnxk/cnxk_tim_worker.h | 201 +++++++++++++++++++++++++++
 7 files changed, 350 insertions(+), 12 deletions(-)

diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index e326ea0122..a1461fedb1 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -409,7 +409,7 @@ tim_hw_info_get(struct roc_tim *roc_tim)
 	mbox_alloc_msg_tim_get_hw_info(mbox);
 	rc = mbox_process_msg(mbox, (void **)&rsp);
 	if (rc && rc != MBOX_MSG_INVALID) {
-		plt_err("Failed to get SSO HW info");
+		plt_err("Failed to get TIM HW info");
 		rc = -EIO;
 		goto exit;
 	}
@@ -443,6 +443,10 @@ roc_tim_init(struct roc_tim *roc_tim)
 	nb_lfs = roc_tim->nb_lfs;
 
 	rc = tim_hw_info_get(roc_tim);
+	if (rc) {
+		plt_tim_dbg("Failed to get TIM HW info");
+		return 0;
+	}
 
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 57e15b6d8c..d68700fc05 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -957,6 +957,13 @@ cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
 	return cn20k_sso_updt_tx_adptr_data(event_dev);
 }
 
+static int
+cn20k_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags, uint32_t *caps,
+		   const struct event_timer_adapter_ops **ops)
+{
+	return cnxk_tim_caps_get(evdev, flags, caps, ops, cn20k_sso_set_priv_mem);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -991,6 +998,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
 	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
 
+	.timer_adapter_caps_get = cn20k_tim_caps_get,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -1068,4 +1077,9 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_STASH "=<string>"
-			      CNXK_SSO_FORCE_BP "=1");
+			      CNXK_SSO_FORCE_BP "=1"
+			      CNXK_TIM_DISABLE_NPA "=1"
+			      CNXK_TIM_CHNK_SLOTS "=<int>"
+			      CNXK_TIM_RINGS_LMT "=<int>"
+			      CNXK_TIM_STATS_ENA "=1"
+			      CNXK_TIM_EXT_CLK "=<string>");
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5799e5cc49..b014e549b9 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -5,6 +5,7 @@
 #ifndef __CN20K_WORKER_H__
 #define __CN20K_WORKER_H__
 
+#include <rte_event_timer_adapter.h>
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
@@ -128,6 +129,11 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		/* Mark vector mempool object as get */
 		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
 					  1);
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_TIMER) {
+		struct rte_event_timer *tev = (struct rte_event_timer *)u64[1];
+
+		tev->state = RTE_EVENT_TIMER_NOT_ARMED;
+		u64[1] = tev->ev.u64;
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 27a4dfb490..994d1d1090 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -78,9 +78,25 @@ cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
 	return rc;
 }
 
+static int
+cnxk_tim_enable_hwwqe(struct cnxk_tim_evdev *dev, struct cnxk_tim_ring *tim_ring)
+{
+	struct roc_tim_hwwqe_cfg hwwqe_cfg;
+
+	memset(&hwwqe_cfg, 0, sizeof(hwwqe_cfg));
+	hwwqe_cfg.hwwqe_ena = 1;
+	hwwqe_cfg.grp_ena = 0;
+	hwwqe_cfg.flw_ctrl_ena = 0;
+	hwwqe_cfg.result_offset = CNXK_TIM_HWWQE_RES_OFFSET_B;
+
+	tim_ring->lmt_base = dev->tim.roc_sso->lmt_base;
+	return roc_tim_lf_config_hwwqe(&dev->tim, tim_ring->ring_id, &hwwqe_cfg);
+}
+
 static void
 cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 {
+	struct cnxk_tim_evdev *dev = cnxk_tim_priv_get();
 	uint8_t prod_flag = !tim_ring->prod_type_sp;
 
 	/* [STATS] [DFB/FB] [SP][MP]*/
@@ -98,6 +114,16 @@ cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 #undef FP
 	};
 
+	if (dev == NULL)
+		return;
+
+	if (dev->tim.feat.hwwqe) {
+		cnxk_tim_ops.arm_burst = cnxk_tim_arm_burst_hwwqe;
+		cnxk_tim_ops.arm_tmo_tick_burst = cnxk_tim_arm_tmo_burst_hwwqe;
+		cnxk_tim_ops.cancel_burst = cnxk_tim_timer_cancel_burst_hwwqe;
+		return;
+	}
+
 	cnxk_tim_ops.arm_burst =
 		arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag];
 	cnxk_tim_ops.arm_tmo_tick_burst =
@@ -224,12 +250,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		}
 	}
 
-	if (tim_ring->disable_npa) {
+	if (!dev->tim.feat.hwwqe && tim_ring->disable_npa) {
 		tim_ring->nb_chunks =
 			tim_ring->nb_timers /
 			CNXK_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
 		tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts;
 	} else {
+		tim_ring->disable_npa = 0;
 		tim_ring->nb_chunks = tim_ring->nb_timers;
 	}
 
@@ -255,6 +282,14 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		goto tim_chnk_free;
 	}
 
+	if (dev->tim.feat.hwwqe) {
+		rc = cnxk_tim_enable_hwwqe(dev, tim_ring);
+		if (rc < 0) {
+			plt_err("Failed to enable hwwqe");
+			goto tim_chnk_free;
+		}
+	}
+
 	plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
 	plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index c5b3d67eb8..114a89ee5a 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -15,6 +15,7 @@
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
+#include <rte_vect.h>
 
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
@@ -29,6 +30,8 @@
 #define CNXK_TIM_MIN_CHUNK_SLOTS    (0x1)
 #define CNXK_TIM_MAX_CHUNK_SLOTS    (0x1FFE)
 #define CNXK_TIM_MAX_POOL_CACHE_SZ  (16)
+#define CNXK_TIM_HWWQE_RES_OFFSET_B (24)
+#define CNXK_TIM_ENT_PER_LMT	    (7)
 
 #define CN9K_TIM_MIN_TMO_TKS (256)
 
@@ -124,6 +127,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	uintptr_t tbase;
 	uint64_t (*tick_fn)(uint64_t tbase);
 	uint64_t ring_start_cyc;
+	uint64_t lmt_base;
 	struct cnxk_tim_bkt *bkt;
 	struct rte_mempool *chunk_pool;
 	struct rte_reciprocal_u64 fast_div;
@@ -310,11 +314,21 @@ TIM_ARM_FASTPATH_MODES
 TIM_ARM_TMO_FASTPATH_MODES
 #undef FP
 
+uint16_t cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers);
+
+uint16_t cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				      struct rte_event_timer **tim, const uint64_t timeout_tick,
+				      const uint16_t nb_timers);
+
 uint16_t
 cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 			    struct rte_event_timer **tim,
 			    const uint16_t nb_timers);
 
+uint16_t cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+					   struct rte_event_timer **tim, const uint16_t nb_timers);
+
 int cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 				 const struct rte_event_timer *evtim, uint64_t *ticks_remaining);
 
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 5e96f6f188..42d376d375 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -32,15 +32,6 @@ cnxk_tim_arm_checks(const struct cnxk_tim_ring *const tim_ring,
 	return -EINVAL;
 }
 
-static inline void
-cnxk_tim_format_event(const struct rte_event_timer *const tim,
-		      struct cnxk_tim_ent *const entry)
-{
-	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 |
-		    (tim->ev.event & 0xFFFFFFFFF);
-	entry->wqe = tim->ev.u64;
-}
-
 static __rte_always_inline uint16_t
 cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 			 struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -77,6 +68,24 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr, struct rte_event_timer **tim,
+			 const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t index;
+
+	for (index = 0; index < nb_timers; index++) {
+		if (cnxk_tim_arm_checks(tim_ring, tim[index]))
+			break;
+
+		if (cnxk_tim_add_entry_hwwqe(tim_ring, tim[index]))
+			break;
+	}
+
+	return index;
+}
+
 #define FP(_name, _f3, _f2, _f1, _flags)                                       \
 	uint16_t __rte_noinline cnxk_tim_arm_burst_##_name(                    \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -132,6 +141,29 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	return set_timers;
 }
 
+uint16_t
+cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+			     struct rte_event_timer **tim, const uint64_t timeout_tick,
+			     const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t idx;
+
+	if (unlikely(!timeout_tick || timeout_tick > tim_ring->nb_bkts)) {
+		const enum rte_event_timer_state state = timeout_tick ?
+								 RTE_EVENT_TIMER_ERROR_TOOLATE :
+								 RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		for (idx = 0; idx < nb_timers; idx++)
+			tim[idx]->state = state;
+
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	return cnxk_tim_add_entry_tmo_hwwqe(tim_ring, tim, timeout_tick * tim_ring->tck_int,
+					    nb_timers);
+}
+
 #define FP(_name, _f2, _f1, _flags)                                            \
 	uint16_t __rte_noinline cnxk_tim_arm_tmo_tick_burst_##_name(           \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -174,6 +206,38 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t i;
+
+	RTE_SET_USED(adptr);
+	for (i = 0; i < nb_timers; i++) {
+		if (tim[i]->state == RTE_EVENT_TIMER_CANCELED) {
+			rte_errno = EALREADY;
+			break;
+		}
+
+		if (tim[i]->state != RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		status = (uint64_t __rte_atomic *)&tim[i]->impl_opaque[1];
+		if (!rte_atomic_compare_exchange_strong_explicit(status, (uint64_t *)&tim[i], 0,
+								 rte_memory_order_release,
+								 rte_memory_order_relaxed)) {
+			rte_errno = ENOENT;
+			break;
+		}
+		tim[i]->state = RTE_EVENT_TIMER_CANCELED;
+	}
+
+	return i;
+}
+
 int
 cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 			     const struct rte_event_timer *evtim, uint64_t *ticks_remaining)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index e52eadbc08..be6744db51 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -132,6 +132,13 @@ cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
 	return (n - (d * rte_reciprocal_divide_u64(n, &R)));
 }
 
+static inline void
+cnxk_tim_format_event(const struct rte_event_timer *const tim, struct cnxk_tim_ent *const entry)
+{
+	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	entry->wqe = tim->ev.u64;
+}
+
 static __rte_always_inline void
 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
 			   const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
@@ -573,6 +580,200 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 	return nb_timers;
 }
 
+static int
+cnxk_tim_add_entry_hwwqe(struct cnxk_tim_ring *const tim_ring, struct rte_event_timer *const tim)
+{
+	uint64_t __rte_atomic *status;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+	int rc = 0;
+
+	status = (uint64_t __rte_atomic *)&tim->impl_opaque[0];
+	status[0] = 0;
+	status[1] = 0;
+
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	lmt = (uint64_t *)lmt_addr;
+
+	lmt[0] = tim->timeout_ticks * tim_ring->tck_int;
+	lmt[1] = 0x1;
+	lmt[2] = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	lmt[3] = (uint64_t)tim;
+
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (1UL << 4);
+	roc_lmt_submit_steorl(wdata, pa);
+
+	do {
+		rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+		rsp &= 0xF0UL;
+	} while (!rsp);
+
+	rsp >>= 4;
+	switch (rsp) {
+	case 0x3:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		rc = !rc;
+		break;
+	case 0x4:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+		rc = !rc;
+		break;
+	case 0x1:
+		tim->state = RTE_EVENT_TIMER_ARMED;
+		break;
+	default:
+		tim->state = RTE_EVENT_TIMER_ERROR;
+		rc = !rc;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+cnxk_tim_add_entry_tmo_hwwqe(struct cnxk_tim_ring *const tim_ring,
+			     struct rte_event_timer **const tim, uint64_t intvl, uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t cnt, i, j, done;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+
+	/* We have 32 LMTLINES per core, but use only 1 line as we need to check status */
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	done = 0;
+	lmt = (uint64_t *)lmt_addr;
+	/* We can do upto 7 timers per LMTLINE */
+	cnt = nb_timers / CNXK_TIM_ENT_PER_LMT;
+
+	lmt[0] = intvl;
+	lmt[1] = 0x1; /* Always relative */
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)(CNXK_TIM_ENT_PER_LMT << 4);
+	for (i = 0; i < cnt; i++) {
+		status = (uint64_t __rte_atomic *)&tim[i * CNXK_TIM_ENT_PER_LMT]->impl_opaque[0];
+
+		for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+			cnxk_tim_format_event(tim[(i * CNXK_TIM_ENT_PER_LMT) + j],
+					      (struct cnxk_tim_ent *)&lmt[(j << 1) + 2]);
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[0] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[1] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += CNXK_TIM_ENT_PER_LMT;
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x4:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic
+							      *)&tim[(i * CNXK_TIM_ENT_PER_LMT) + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+							RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+			goto done;
+		}
+	}
+
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)((nb_timers - cnt) << 4);
+	if (nb_timers - cnt) {
+		status = (uint64_t __rte_atomic *)&tim[cnt]->impl_opaque[0];
+
+		for (i = 0; i < nb_timers - cnt; i++) {
+			cnxk_tim_format_event(tim[cnt + i],
+					      (struct cnxk_tim_ent *)&lmt[(i << 1) + 2]);
+			tim[cnt + i]->impl_opaque[0] = 0;
+			tim[cnt + i]->impl_opaque[1] = 0;
+			tim[cnt + i]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += (nb_timers - cnt);
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x4:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < nb_timers - cnt; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic *)&tim[cnt + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+done:
+	return done;
+}
+
 static int
 cnxk_tim_rm_entry(struct rte_event_timer *tim)
 {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* Re: [PATCH v4 01/22] event/cnxk: use stdatomic API
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (20 preceding siblings ...)
  2024-10-22 19:35       ` [PATCH v4 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
@ 2024-10-25  6:37       ` Jerin Jacob
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
  22 siblings, 0 replies; 181+ messages in thread
From: Jerin Jacob @ 2024-10-25  6:37 UTC (permalink / raw)
  To: pbhagavatula
  Cc: jerinj, stephen, Shijith Thotton, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra, dev

On Wed, Oct 23, 2024 at 1:05 AM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Replace gcc inbuilt __atomic_xxx intrinsics with rte_atomic_xxx API.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> Depends-on: series-33602 ("event/cnxk: fix getwork write data on reconfig")

Some reason CI not ran due to this depends. Merged this this patch,
Please resend to make CI run.
Furthermore, in the release note update, please cnxk update after DSW PMD.

^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 01/22] event/cnxk: use stdatomic API
  2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
                         ` (21 preceding siblings ...)
  2024-10-25  6:37       ` [PATCH v4 01/22] event/cnxk: use stdatomic API Jerin Jacob
@ 2024-10-25  8:13       ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 02/22] common/cnxk: implement SSO HW info pbhagavatula
                           ` (21 more replies)
  22 siblings, 22 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Replace gcc inbuilt __atomic_xxx intrinsics with rte_atomic_xxx API.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v2 Changes:
- Rebase and remove single dequeue and enqueue functions.
v3 Changes:
- Remove __atomic builtins.
v4 Changes:
- Rebase onto next-event tree.
v5 Changes:
- Rebase, shuffle release notes order.

 drivers/event/cnxk/cn10k_eventdev.c         |  6 +--
 drivers/event/cnxk/cn10k_eventdev.h         |  4 +-
 drivers/event/cnxk/cn10k_tx_worker.h        |  7 ++-
 drivers/event/cnxk/cn10k_worker.c           | 15 +++---
 drivers/event/cnxk/cn10k_worker.h           |  2 +-
 drivers/event/cnxk/cn9k_eventdev.c          |  8 +--
 drivers/event/cnxk/cn9k_worker.h            | 18 ++++---
 drivers/event/cnxk/cnxk_eventdev.h          |  4 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c | 60 ++++++++++-----------
 drivers/event/cnxk/cnxk_tim_evdev.c         |  4 +-
 drivers/event/cnxk/cnxk_tim_evdev.h         | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.c        | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.h        | 57 ++++++++++----------
 drivers/event/cnxk/cnxk_worker.h            |  3 +-
 drivers/net/cnxk/cn9k_ethdev.h              |  2 +-
 15 files changed, 108 insertions(+), 102 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4edac33a84..4a2c88c8c6 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -94,9 +94,9 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	uint64_t val;

 	ws->grp_base = grp_base;
-	ws->fc_mem = (int64_t *)dev->fc_iova;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 	ws->xaq_lmt = dev->xaq_lmt;
-	ws->fc_cache_space = dev->fc_cache_space;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);

@@ -768,7 +768,7 @@ cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 	for (i = 0; i < dev->nb_event_ports; i++) {
 		struct cn10k_sso_hws *ws = event_dev->data->ports[i];
 		ws->xaq_lmt = dev->xaq_lmt;
-		ws->fc_mem = (int64_t *)dev->fc_iova;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 		ws->tstamp = dev->tstamp;
 		if (lookup_mem)
 			ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index 372121465c..b8395aa314 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -19,8 +19,8 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	uint64_t meta_aura;
 	/* Add Work Fastpath data */
-	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
-	int64_t *fc_cache_space;
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
 	int32_t xaq_lmt;
diff --git a/drivers/event/cnxk/cn10k_tx_worker.h b/drivers/event/cnxk/cn10k_tx_worker.h
index 0695ea23e1..19cb2e22e5 100644
--- a/drivers/event/cnxk/cn10k_tx_worker.h
+++ b/drivers/event/cnxk/cn10k_tx_worker.h
@@ -51,7 +51,9 @@ cn10k_sso_txq_fc_wait(const struct cn10k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -60,7 +62,8 @@ static __rte_always_inline int32_t
 cn10k_sso_sq_depth(const struct cn10k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index c49138316c..06ad7437d5 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -16,7 +16,7 @@ cn10k_sso_hws_new_event(struct cn10k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -80,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
-	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) *
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
 	       ROC_SSO_XAE_PER_XAQ;
 }

@@ -90,19 +90,20 @@ sso_lmt_aw_wait_fc(struct cn10k_sso_hws *ws, int64_t req)
 	int64_t cached, refill;

 retry:
-	while (__atomic_load_n(ws->fc_cache_space, __ATOMIC_RELAXED) < 0)
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
 		;

-	cached = __atomic_fetch_sub(ws->fc_cache_space, req, __ATOMIC_ACQUIRE) - req;
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
 	/* Check if there is enough space, else update and retry. */
 	if (cached < 0) {
 		/* Check if we have space else retry. */
 		do {
 			refill = sso_read_xaq_space(ws);
 		} while (refill <= 0);
-		__atomic_compare_exchange(ws->fc_cache_space, &cached, &refill,
-					  0, __ATOMIC_RELEASE,
-					  __ATOMIC_RELAXED);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
 		goto retry;
 	}
 }
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 5d3394508e..954dee5a2a 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -311,7 +311,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 		roc_load_pair(gw.u64[0], gw.u64[1],
 			      ws->base + SSOW_LF_GWS_WQE0);
 	} while (gw.u64[0] & BIT_ULL(63));
-	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
 #endif
 	ws->gw_rdata = gw.u64[0];
 	if (gw.u64[1])
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index b176044aa5..05e237c005 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -74,7 +74,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	if (dev->dual_ws) {
 		dws = hws;
 		dws->grp_base = grp_base;
-		dws->fc_mem = (uint64_t *)dev->fc_iova;
+		dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		dws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, dws->base[0] + SSOW_LF_GWS_NW_TIM);
@@ -82,7 +82,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	} else {
 		ws = hws;
 		ws->grp_base = grp_base;
-		ws->fc_mem = (uint64_t *)dev->fc_iova;
+		ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		ws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
@@ -822,14 +822,14 @@ cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 			struct cn9k_sso_hws_dual *dws =
 				event_dev->data->ports[i];
 			dws->xaq_lmt = dev->xaq_lmt;
-			dws->fc_mem = (uint64_t *)dev->fc_iova;
+			dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			dws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				dws->lookup_mem = lookup_mem;
 		} else {
 			struct cn9k_sso_hws *ws = event_dev->data->ports[i];
 			ws->xaq_lmt = dev->xaq_lmt;
-			ws->fc_mem = (uint64_t *)dev->fc_iova;
+			ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			ws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 064cdfe94a..71caf45574 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -28,7 +28,7 @@ cn9k_sso_hws_new_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -71,7 +71,7 @@ cn9k_sso_hws_new_event_wait(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (ws->xaq_lmt <= __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED))
+	while (ws->xaq_lmt <= rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -93,7 +93,7 @@ cn9k_sso_hws_forward_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(ws->base);
 		cn9k_sso_hws_new_event_wait(ws, ev);
 	}
@@ -110,7 +110,7 @@ cn9k_sso_hws_dual_new_event(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (dws->xaq_lmt <= *dws->fc_mem)
 		return 0;

@@ -128,7 +128,7 @@ cn9k_sso_hws_dual_new_event_wait(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (dws->xaq_lmt <= __atomic_load_n(dws->fc_mem, __ATOMIC_RELAXED))
+	while (dws->xaq_lmt <= rte_atomic_load_explicit(dws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -151,7 +151,7 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, uint64_t base,
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(base);
 		cn9k_sso_hws_dual_new_event_wait(dws, ev);
 	}
@@ -571,7 +571,9 @@ cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -740,7 +742,7 @@ static __rte_always_inline int32_t
 cn9k_sso_sq_depth(const struct cn9k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit(txq->fc_mem, rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index f147ef3c78..982bbb6a9b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -136,7 +136,7 @@ struct __rte_cache_aligned cn9k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
@@ -154,7 +154,7 @@ struct __rte_cache_aligned cn9k_sso_hws_dual {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 95c0f1b1f7..a4615c1356 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -63,7 +63,7 @@ seqn_list_update(int val)
 		return -1;

 	seqn_list[seqn_list_index++] = val;
-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	return 0;
 }

@@ -82,7 +82,7 @@ seqn_list_check(int limit)
 }

 struct test_core_param {
-	uint32_t *total_events;
+	uint32_t __rte_atomic *total_events;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port;
 	uint8_t sched_type;
@@ -540,13 +540,13 @@ static int
 worker_multi_port_fn(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;
 	int ret;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;
@@ -554,30 +554,30 @@ worker_multi_port_fn(void *arg)
 		ret = validate_event(&ev);
 		RTE_TEST_ASSERT_SUCCESS(ret, "Failed to validate event");
 		rte_pktmbuf_free(ev.mbuf);
-		__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+		rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 	}

 	return 0;
 }

 static inline int
-wait_workers_to_join(const uint32_t *count)
+wait_workers_to_join(const uint32_t __rte_atomic *count)
 {
 	uint64_t cycles, print_cycles;

 	cycles = rte_get_timer_cycles();
 	print_cycles = cycles;
-	while (__atomic_load_n(count, __ATOMIC_RELAXED)) {
+	while (rte_atomic_load_explicit(count, rte_memory_order_relaxed)) {
 		uint64_t new_cycles = rte_get_timer_cycles();

 		if (new_cycles - print_cycles > rte_get_timer_hz()) {
 			plt_info("Events %d",
-				 __atomic_load_n(count, __ATOMIC_RELAXED));
+				 rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			print_cycles = new_cycles;
 		}
 		if (new_cycles - cycles > rte_get_timer_hz() * 10000000000) {
 			plt_err("No schedules for seconds, deadlock (%d)",
-				__atomic_load_n(count, __ATOMIC_RELAXED));
+				rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			rte_event_dev_dump(evdev, stdout);
 			cycles = new_cycles;
 			return -1;
@@ -593,7 +593,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 			int (*worker_thread)(void *), uint32_t total_events,
 			uint8_t nb_workers, uint8_t sched_type)
 {
-	uint32_t atomic_total_events;
+	uint32_t __rte_atomic atomic_total_events;
 	struct test_core_param *param;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port = 0;
@@ -603,7 +603,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 	if (!nb_workers)
 		return 0;

-	__atomic_store_n(&atomic_total_events, total_events, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&atomic_total_events, total_events, rte_memory_order_relaxed);
 	seqn_list_init();

 	param = malloc(sizeof(struct test_core_param) * nb_workers);
@@ -640,7 +640,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		param[port].sched_type = sched_type;
 		param[port].port = port;
 		param[port].dequeue_tmo_ticks = dequeue_tmo_ticks;
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		w_lcore = rte_get_next_lcore(w_lcore, 1, 0);
 		if (w_lcore == RTE_MAX_LCORE) {
 			plt_err("Failed to get next available lcore");
@@ -651,7 +651,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		rte_eal_remote_launch(worker_thread, &param[port], w_lcore);
 	}

-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	ret = wait_workers_to_join(&atomic_total_events);
 	free(param);

@@ -890,13 +890,13 @@ worker_flow_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -916,8 +916,8 @@ worker_flow_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1046,13 +1046,13 @@ worker_group_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -1072,8 +1072,8 @@ worker_group_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1205,19 +1205,19 @@ static int
 worker_flow_based_pipeline_max_stages_rand_sched_type(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.sub_event_type == MAX_STAGES) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.sub_event_type++;
@@ -1284,16 +1284,16 @@ worker_queue_based_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
@@ -1329,16 +1329,16 @@ worker_mixed_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* Last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index bba70646fa..74a6da5070 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -358,7 +358,7 @@ cnxk_tim_stats_get(const struct rte_event_timer_adapter *adapter,
 		tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;

 	stats->evtim_exp_count =
-		__atomic_load_n(&tim_ring->arm_cnt, __ATOMIC_RELAXED);
+		rte_atomic_load_explicit(&tim_ring->arm_cnt, rte_memory_order_relaxed);
 	stats->ev_enq_count = stats->evtim_exp_count;
 	stats->adapter_tick_count =
 		rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div);
@@ -370,7 +370,7 @@ cnxk_tim_stats_reset(const struct rte_event_timer_adapter *adapter)
 {
 	struct cnxk_tim_ring *tim_ring = adapter->data->adapter_priv;

-	__atomic_store_n(&tim_ring->arm_cnt, 0, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&tim_ring->arm_cnt, 0, rte_memory_order_relaxed);
 	return 0;
 }

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 6cf10dbf4d..f4c61dfb44 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -108,15 +108,15 @@ struct cnxk_tim_evdev {
 struct cnxk_tim_bkt {
 	uint64_t first_chunk;
 	union {
-		uint64_t w1;
+		uint64_t __rte_atomic w1;
 		struct {
-			uint32_t nb_entry;
+			uint32_t __rte_atomic nb_entry;
 			uint8_t sbt : 1;
 			uint8_t hbt : 1;
 			uint8_t bsk : 1;
 			uint8_t rsvd : 5;
-			uint8_t lock;
-			int16_t chunk_remainder;
+			uint8_t __rte_atomic lock;
+			int16_t __rte_atomic chunk_remainder;
 		};
 	};
 	uint64_t current_chunk;
@@ -134,7 +134,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	struct rte_reciprocal_u64 fast_div;
 	struct rte_reciprocal_u64 fast_bkt;
 	uint64_t tck_int;
-	uint64_t arm_cnt;
+	uint64_t __rte_atomic arm_cnt;
 	uintptr_t base;
 	uint8_t prod_type_sp;
 	uint8_t enable_stats;
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 1f2f2fe5d8..db31f91818 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -70,7 +70,7 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, index, __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, index, rte_memory_order_relaxed);

 	return index;
 }
@@ -124,8 +124,8 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers,
-				   __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, set_timers,
+					      rte_memory_order_relaxed);

 	return set_timers;
 }
@@ -151,7 +151,7 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	int ret;

 	RTE_SET_USED(adptr);
-	rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+	rte_atomic_thread_fence(rte_memory_order_acquire);
 	for (index = 0; index < nb_timers; index++) {
 		if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) {
 			rte_errno = EALREADY;
@@ -193,7 +193,7 @@ cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 		return -ENOENT;

 	bkt = (struct cnxk_tim_bkt *)evtim->impl_opaque[1];
-	sema = __atomic_load_n(&bkt->w1, rte_memory_order_acquire);
+	sema = rte_atomic_load_explicit(&bkt->w1, rte_memory_order_acquire);
 	if (cnxk_tim_bkt_get_hbt(sema) || !cnxk_tim_bkt_get_nent(sema))
 		return -ENOENT;

diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index f530d8c5c4..e52eadbc08 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -23,19 +23,19 @@ cnxk_tim_bkt_fetch_rem(uint64_t w1)
 static inline int16_t
 cnxk_tim_bkt_get_rem(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
+	return rte_atomic_load_explicit(&bktp->chunk_remainder, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_set_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_sub_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline uint8_t
@@ -56,20 +56,20 @@ cnxk_tim_bkt_clr_bsk(struct cnxk_tim_bkt *bktp)
 	/* Clear everything except lock. */
 	const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema_lock(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
-				  __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
+					     rte_memory_order_acquire);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA, rte_memory_order_relaxed);
 }

 static inline uint64_t
@@ -77,19 +77,19 @@ cnxk_tim_bkt_inc_lock(struct cnxk_tim_bkt *bktp)
 {
 	const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, v, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_dec_lock(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_release);
 }

 static inline void
 cnxk_tim_bkt_dec_lock_relaxed(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_relaxed);
 }

 static inline uint32_t
@@ -102,19 +102,19 @@ cnxk_tim_bkt_get_nent(uint64_t w1)
 static inline void
 cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_add(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, 1, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent_relaxed(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELEASE);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_release);
 }

 static inline uint64_t
@@ -123,7 +123,7 @@ cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
 	const uint64_t v =
 		~(TIM_BUCKET_W1_M_NUM_ENTRIES << TIM_BUCKET_W1_S_NUM_ENTRIES);

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL) & v;
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel) & v;
 }

 static inline uint64_t
@@ -273,8 +273,8 @@ cnxk_tim_add_entry_sp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -356,8 +356,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -385,8 +385,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 			     : [crem] "r"(&bkt->w1)
 			     : "memory");
 #else
-		while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
-		       0)
+		while (rte_atomic_load_explicit((int64_t __rte_atomic *)&bkt->w1,
+						rte_memory_order_relaxed) < 0)
 			;
 #endif
 		goto __retry;
@@ -408,15 +408,14 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 		*chunk = *pent;
 		if (cnxk_tim_bkt_fetch_lock(lock_sema)) {
 			do {
-				lock_sema = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				lock_sema = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (cnxk_tim_bkt_fetch_lock(lock_sema) - 1);
 		}
-		rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+		rte_atomic_thread_fence(rte_memory_order_acquire);
 		mirr_bkt->current_chunk = (uintptr_t)chunk;
-		__atomic_store_n(&bkt->chunk_remainder,
-				 tim_ring->nb_chunk_slots - 1,
-				 __ATOMIC_RELEASE);
+		rte_atomic_store_explicit(&bkt->chunk_remainder, tim_ring->nb_chunk_slots - 1,
+					  rte_memory_order_release);
 	} else {
 		chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
 		chunk += tim_ring->nb_chunk_slots - rem;
@@ -489,8 +488,8 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -521,7 +520,7 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 			     : [lock] "r"(&bkt->lock)
 			     : "memory");
 #else
-		while (__atomic_load_n(&bkt->lock, __ATOMIC_RELAXED))
+		while (rte_atomic_load_explicit(&bkt->lock, rte_memory_order_relaxed))
 			;
 #endif
 		goto __retry;
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 0e0d728ba4..3592344e04 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -33,7 +33,8 @@ cnxk_sso_hws_swtag_desched(uint32_t tag, uint8_t new_tt, uint16_t grp,
 	uint64_t val;

 	val = tag | ((uint64_t)(new_tt & 0x3) << 32) | ((uint64_t)grp << 34);
-	__atomic_store_n((uint64_t *)swtag_desched_op, val, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit((uint64_t __rte_atomic *)swtag_desched_op, val,
+				  rte_memory_order_release);
 }

 static __rte_always_inline void
diff --git a/drivers/net/cnxk/cn9k_ethdev.h b/drivers/net/cnxk/cn9k_ethdev.h
index 4933954c33..c0e649655d 100644
--- a/drivers/net/cnxk/cn9k_ethdev.h
+++ b/drivers/net/cnxk/cn9k_ethdev.h
@@ -11,7 +11,7 @@
 struct cn9k_eth_txq {
 	uint64_t send_hdr_w0;
 	int64_t fc_cache_pkts;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	void *lmt_addr;
 	rte_iova_t io_addr;
 	uint64_t lso_tun_fmt;
--
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 02/22] common/cnxk: implement SSO HW info
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
                           ` (20 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Ankur Dwivedi, Anoob Joseph, Tejasree Kondoj, Pavan Nikhilesh,
	Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HW info mbox to get hardware capabilities, and reuse
them instead of depending on hardcoded values.
Remove redundant includes.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_mbox.h              | 28 ++++++++++
 drivers/common/cnxk/roc_sso.c               | 58 ++++++++++++++++++---
 drivers/common/cnxk/roc_sso.h               |  9 ++--
 drivers/common/cnxk/version.map             |  1 +
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c   |  5 +-
 drivers/crypto/cnxk/cn9k_cryptodev_ops.c    |  9 +---
 drivers/event/cnxk/cn10k_eventdev.c         |  1 +
 drivers/event/cnxk/cn10k_eventdev.h         |  1 +
 drivers/event/cnxk/cn10k_worker.c           |  6 ++-
 drivers/event/cnxk/cnxk_eventdev.c          |  4 +-
 drivers/event/cnxk/cnxk_eventdev.h          |  3 --
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  2 +
 drivers/event/cnxk/cnxk_eventdev_stats.c    |  2 +
 drivers/event/cnxk/cnxk_tim_evdev.c         |  2 +-
 drivers/event/cnxk/cnxk_tim_worker.c        |  2 +
 drivers/event/cnxk/cnxk_worker.c            |  4 +-
 16 files changed, 103 insertions(+), 34 deletions(-)

diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index dd65946e9e..63139b5517 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,7 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
 	  tim_lf_alloc_rsp)                                                    \
@@ -2119,6 +2120,33 @@ struct ssow_chng_mship {
 	uint16_t __io hwgrps[MAX_RVU_BLKLF_CNT]; /* Array of hwgrps. */
 };
 
+struct sso_feat_info {
+	uint8_t __io hw_flr : 1;
+	uint8_t __io hw_prefetch : 1;
+	uint8_t __io sw_prefetch : 1;
+	uint8_t __io lsw : 1;
+	uint8_t __io fwd_grp : 1;
+	uint8_t __io eva_present : 1;
+	uint8_t __io no_nsched : 1;
+	uint8_t __io tag_cfg : 1;
+	uint8_t __io gwc_per_core;
+	uint16_t __io hws;
+	uint16_t __io hwgrps;
+	uint16_t __io hwgrps_per_pf;
+	uint16_t __io iue;
+	uint16_t __io taq_lines;
+	uint16_t __io taq_ent_per_line;
+	uint16_t __io xaq_buf_size;
+	uint16_t __io xaq_wq_entries;
+	uint32_t __io eva_ctx_per_hwgrp;
+	uint64_t __io rsvd[2];
+};
+
+struct sso_hw_info {
+	struct mbox_msghdr hdr;
+	struct sso_feat_info feat;
+};
+
 struct sso_hw_setconfig {
 	struct mbox_msghdr hdr;
 	uint32_t __io npa_aura_id;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 2e3b134bfc..8a219b985b 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -191,7 +191,7 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 		goto exit;
 	}
 
-	roc_sso->max_hwgrp = rsrc_cnt->sso;
+	roc_sso->max_hwgrp = PLT_MIN(rsrc_cnt->sso, roc_sso->feat.hwgrps_per_pf);
 	roc_sso->max_hws = rsrc_cnt->ssow;
 
 	rc = 0;
@@ -200,6 +200,37 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 	return rc;
 }
 
+static int
+sso_hw_info_get(struct roc_sso *roc_sso)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct sso_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_sso_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc == MBOX_MSG_INVALID) {
+		roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	} else {
+		mbox_memcpy(&roc_sso->feat, &rsp->feat, sizeof(roc_sso->feat));
+
+		if (!roc_sso->feat.hwgrps_per_pf)
+			roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	}
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 void
 sso_hws_link_modify(uint8_t hws, uintptr_t base, struct plt_bitmap *bmp, uint16_t hwgrp[],
 		    uint16_t n, uint8_t set, uint16_t enable)
@@ -319,6 +350,12 @@ roc_sso_hwgrp_base_get(struct roc_sso *roc_sso, uint16_t hwgrp)
 	return dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | hwgrp << 12);
 }
 
+uint16_t
+roc_sso_pf_func_get(void)
+{
+	return idev_sso_pffunc_get();
+}
+
 uint64_t
 roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 {
@@ -670,9 +707,8 @@ roc_sso_hwgrp_init_xaq_aura(struct roc_sso *roc_sso, uint32_t nb_xae)
 	struct dev *dev = &sso->dev;
 	int rc;
 
-	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae,
-				     roc_sso->xae_waes, roc_sso->xaq_buf_size,
-				     roc_sso->nb_hwgrp);
+	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae, roc_sso->feat.xaq_wq_entries,
+				     roc_sso->feat.xaq_buf_size, roc_sso->nb_hwgrp);
 	return rc;
 }
 
@@ -953,9 +989,11 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 		goto hwgrp_alloc_fail;
 	}
 
-	roc_sso->xaq_buf_size = rsp_hwgrp->xaq_buf_size;
-	roc_sso->xae_waes = rsp_hwgrp->xaq_wq_entries;
-	roc_sso->iue = rsp_hwgrp->in_unit_entries;
+	if (!roc_sso->feat.xaq_buf_size || !roc_sso->feat.xaq_wq_entries || !roc_sso->feat.iue) {
+		roc_sso->feat.xaq_buf_size = rsp_hwgrp->xaq_buf_size;
+		roc_sso->feat.xaq_wq_entries = rsp_hwgrp->xaq_wq_entries;
+		roc_sso->feat.iue = rsp_hwgrp->in_unit_entries;
+	}
 
 	rc = sso_msix_fill(roc_sso, nb_hws, nb_hwgrp);
 	if (rc < 0) {
@@ -1059,6 +1097,12 @@ roc_sso_dev_init(struct roc_sso *roc_sso)
 		goto fail;
 	}
 
+	rc = sso_hw_info_get(roc_sso);
+	if (rc < 0) {
+		plt_err("Failed to get SSO HW info");
+		goto fail;
+	}
+
 	rc = sso_rsrc_get(roc_sso);
 	if (rc < 0) {
 		plt_err("Failed to get SSO resources");
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 4ac901762e..021db22c86 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -8,7 +8,7 @@
 #include "hw/ssow.h"
 
 #define ROC_SSO_AW_PER_LMT_LINE_LOG2 3
-#define ROC_SSO_XAE_PER_XAQ	     352
+#define ROC_SSO_MAX_HWGRP_PER_PF     256
 
 struct roc_sso_hwgrp_qos {
 	uint16_t hwgrp;
@@ -57,9 +57,7 @@ struct roc_sso {
 	uintptr_t lmt_base;
 	struct roc_sso_xaq_data xaq;
 	/* HW Const. */
-	uint32_t xae_waes;
-	uint32_t xaq_buf_size;
-	uint32_t iue;
+	struct sso_feat_info feat;
 	/* Private data. */
 #define ROC_SSO_MEM_SZ (16 * 1024)
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
@@ -103,6 +101,9 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
 
+/* Utility function */
+uint16_t __roc_api roc_sso_pf_func_get(void);
+
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 877333b80c..de748ac409 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -516,6 +516,7 @@ INTERNAL {
 	roc_sso_hws_gwc_invalidate;
 	roc_sso_hws_unlink;
 	roc_sso_ns_to_gw;
+	roc_sso_pf_func_get;
 	roc_sso_rsrc_fini;
 	roc_sso_rsrc_init;
 	roc_tim_fini;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 88ea032bcb..dbebc5aef1 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -11,10 +11,7 @@
 
 #include <ethdev_driver.h>
 
-#include "roc_cpt.h"
-#include "roc_idev.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn10k_cryptodev.h"
 #include "cn10k_cryptodev_event_dp.h"
diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
index ae00af5019..8d10bc9f9b 100644
--- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
@@ -8,14 +8,7 @@
 #include <rte_ip.h>
 #include <rte_vect.h>
 
-#include "roc_cpt.h"
-#if defined(__aarch64__)
-#include "roc_io.h"
-#else
-#include "roc_io_generic.h"
-#endif
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn9k_cryptodev.h"
 #include "cn9k_cryptodev_ops.h"
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4a2c88c8c6..c7af0fac11 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -64,6 +64,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
 
 	return ws;
 }
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index b8395aa314..4f0eab8acb 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -23,6 +23,7 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
+	uint16_t xae_waes;
 	int32_t xaq_lmt;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 06ad7437d5..80077ec8a1 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cn10k_worker.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -81,7 +83,7 @@ static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
 	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
-	       ROC_SSO_XAE_PER_XAQ;
+		ws->xae_waes;
 }
 
 static inline void
@@ -394,7 +396,7 @@ cn10k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
 	int32_t space;
 
 	/* Do a common back-pressure check and return */
-	space = sso_read_xaq_space(ws) - ROC_SSO_XAE_PER_XAQ;
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
 	if (space <= 0)
 		return 0;
 	nb_events = space < nb_events ? space : nb_events;
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 84a55511a3..ab7420ab79 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -2,7 +2,7 @@
  * Copyright(C) 2021 Marvell.
  */
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
@@ -47,7 +47,7 @@ cnxk_sso_xaq_allocate(struct cnxk_sso_evdev *dev)
 	if (dev->num_events > 0)
 		xae_cnt = dev->num_events;
 	else
-		xae_cnt = dev->sso.iue;
+		xae_cnt = dev->sso.feat.iue;
 
 	if (dev->xae_cnt)
 		xae_cnt += dev->xae_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 982bbb6a9b..904a9b022d 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -21,9 +21,6 @@
 
 #include "cnxk_eventdev_dp.h"
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-
 #include "cnxk_tim_evdev.h"
 
 #define CNXK_SSO_XAE_CNT   "xae_cnt"
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index a4615c1356..311de3d92b 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -18,6 +18,8 @@
 #include <rte_random.h>
 #include <rte_test.h>
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_eventdev_stats.c b/drivers/event/cnxk/cnxk_eventdev_stats.c
index a8a87a06e4..6dea91aedf 100644
--- a/drivers/event/cnxk/cnxk_eventdev_stats.c
+++ b/drivers/event/cnxk/cnxk_eventdev_stats.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 74a6da5070..27a4dfb490 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -4,7 +4,7 @@
 
 #include <math.h>
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_tim_evdev.h"
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index db31f91818..5e96f6f188 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_tim_evdev.h"
 #include "cnxk_tim_worker.h"
 
diff --git a/drivers/event/cnxk/cnxk_worker.c b/drivers/event/cnxk/cnxk_worker.c
index 60876abcff..a07c9185d9 100644
--- a/drivers/event/cnxk/cnxk_worker.c
+++ b/drivers/event/cnxk/cnxk_worker.c
@@ -6,9 +6,7 @@
 #include <rte_pmd_cnxk_eventdev.h>
 #include <rte_eventdev.h>
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 struct pwords {
 	uint64_t u[5];
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 03/22] event/cnxk: add CN20K specific device probe
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 02/22] common/cnxk: implement SSO HW info pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 04/22] event/cnxk: add CN20K device config pbhagavatula
                           ` (19 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra, Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add platform specific event device probe and remove, also add
event device info get function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 doc/guides/eventdevs/cnxk.rst          | 23 ++++---
 doc/guides/rel_notes/release_24_11.rst |  4 ++
 drivers/common/cnxk/roc_sso.c          | 10 ++-
 drivers/event/cnxk/cn20k_eventdev.c    | 93 ++++++++++++++++++++++++++
 drivers/event/cnxk/meson.build         |  8 ++-
 5 files changed, 124 insertions(+), 14 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.c

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index e21846f4e0..55028f889b 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -16,6 +16,7 @@ Supported OCTEON cnxk SoCs
 
 - CN9XX
 - CN10XX
+- CN20XX
 
 Features
 --------
@@ -36,7 +37,7 @@ Features of the OCTEON cnxk SSO PMD are:
   DRAM
 - HW accelerated dequeue timeout support to enable power management
 - HW managed event timers support through TIM, with high precision and
-  time granularity of 2.5us on CN9K and 1us on CN10K.
+  time granularity of 2.5us on CN9K and 1us on CN10K/CN20K.
 - Up to 256 TIM rings a.k.a event timer adapters.
 - Up to 8 rings traversed in parallel.
 - HW managed packets enqueued from ethdev to eventdev exposed through event eth
@@ -45,8 +46,8 @@ Features of the OCTEON cnxk SSO PMD are:
 - Lockfree Tx from event eth Tx adapter using ``RTE_ETH_TX_OFFLOAD_MT_LOCKFREE``
   capability while maintaining receive packet order.
 - Full Rx/Tx offload support defined through ethdev queue configuration.
-- HW managed event vectorization on CN10K for packets enqueued from ethdev to
-  eventdev configurable per each Rx queue in Rx adapter.
+- HW managed event vectorization on CN10K/CN20K for packets enqueued from ethdev
+  to eventdev configurable per each Rx queue in Rx adapter.
 - Event vector transmission via Tx adapter.
 - Up to 2 event link profiles.
 
@@ -93,13 +94,13 @@ Runtime Config Options
 
     -a 0002:0e:00.0,qos=[1-50-50]
 
-- ``CN10K WQE stashing support``
+- ``CN10K/CN20K WQE stashing support``
 
-  CN10K supports stashing the scheduled WQE carried by `rte_event` to the
-  cores L2 Dcache. The number of cache lines to be stashed and the offset
-  is configurable per HWGRP i.e. event queue. The dictionary format is as
-  follows `[Qx|stash_offset|stash_length]` here the stash offset can be
-  a negative integer.
+  CN10K/CN20K supports stashing the scheduled WQE carried by `rte_event`
+  to the cores L2 Dcache. The number of cache lines to be stashed and the
+  offset is configurable per HWGRP i.e. event queue. The dictionary format
+  is as follows `[Qx|stash_offset|stash_length]` here the stash offset can
+  be a negative integer.
   By default, stashing is enabled on queues which have been connected to
   Rx adapter. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed.
 
@@ -188,8 +189,8 @@ Runtime Config Options
 
     -a 0002:0e:00.0,tim_eclk_freq=122880000-1000000000-0
 
-Power Saving on CN10K
----------------------
+Power Saving on CN10K/CN20K
+---------------------------
 
 ARM cores can additionally use WFE when polling for transactions on SSO bus
 to save power i.e., in the event dequeue call ARM core can enter WFE and exit
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 5461798970..680d7a0199 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -231,6 +231,10 @@ New Features
 
   * Added independent enqueue feature.
 
+* **Updated Marvell cnxk event device driver.**
+
+  * Added eventdev driver support for CN20K SoC.
+
 * **Added IPv4 network order lookup in the FIB library.**
 
   A new flag field is introduced in ``rte_fib_conf`` structure.
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 8a219b985b..45cf6fc39e 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -870,7 +870,10 @@ sso_update_msix_vec_count(struct roc_sso *roc_sso, uint16_t sso_vec_cnt)
 	if (idev == NULL)
 		return -ENODEV;
 
-	mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
+	if (roc_model_is_cn20k())
+		mbox_vec_cnt = RVU_MBOX_PF_INT_VEC_AFPF_MBOX + 1;
+	else
+		mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
 
 	/* Allocating vectors for the first time */
 	if (plt_intr_max_intr_get(pci_dev->intr_handle) == 0) {
@@ -1017,7 +1020,10 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 	}
 
 	/* 2 error interrupt per TIM LF */
-	sso_vec_cnt += 2 * nb_tim_lfs;
+	if (roc_model_is_cn20k())
+		sso_vec_cnt += 3 * nb_tim_lfs;
+	else
+		sso_vec_cnt += 2 * nb_tim_lfs;
 
 	rc = sso_update_msix_vec_count(roc_sso, sso_vec_cnt);
 	if (rc < 0) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
new file mode 100644
index 0000000000..c4b80f64f3
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include "roc_api.h"
+
+#include "cnxk_eventdev.h"
+
+static void
+cn20k_sso_set_rsrc(void *arg)
+{
+	struct cnxk_sso_evdev *dev = arg;
+
+	dev->max_event_ports = dev->sso.max_hws;
+	dev->max_event_queues = dev->sso.max_hwgrp > RTE_EVENT_MAX_QUEUES_PER_DEV ?
+					RTE_EVENT_MAX_QUEUES_PER_DEV :
+					dev->sso.max_hwgrp;
+}
+
+static void
+cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	dev_info->driver_name = RTE_STR(EVENTDEV_NAME_CN20K_PMD);
+	cnxk_sso_info_get(dev, dev_info);
+	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
+}
+
+static struct eventdev_ops cn20k_sso_dev_ops = {
+	.dev_infos_get = cn20k_sso_info_get,
+};
+
+static int
+cn20k_sso_init(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = roc_plt_init();
+	if (rc < 0) {
+		plt_err("Failed to initialize platform model");
+		return rc;
+	}
+
+	event_dev->dev_ops = &cn20k_sso_dev_ops;
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	rc = cnxk_sso_init(event_dev);
+	if (rc < 0)
+		return rc;
+
+	cn20k_sso_set_rsrc(cnxk_sso_pmd_priv(event_dev));
+	if (!dev->max_event_ports || !dev->max_event_queues) {
+		plt_err("Not enough eventdev resource queues=%d ports=%d", dev->max_event_queues,
+			dev->max_event_ports);
+		cnxk_sso_fini(event_dev);
+		return -ENODEV;
+	}
+
+	plt_sso_dbg("Initializing %s max_queues=%d max_ports=%d", event_dev->data->name,
+		    dev->max_event_queues, dev->max_event_ports);
+
+	return 0;
+}
+
+static int
+cn20k_sso_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	return rte_event_pmd_pci_probe(pci_drv, pci_dev, sizeof(struct cnxk_sso_evdev),
+				       cn20k_sso_init);
+}
+
+static const struct rte_pci_id cn20k_pci_sso_map[] = {
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_PF),
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_VF),
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver cn20k_pci_sso = {
+	.id_table = cn20k_pci_sso_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
+	.probe = cn20k_sso_probe,
+	.remove = cnxk_sso_remove,
+};
+
+RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
+RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
+RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 6757af74bf..21cd5c5ae6 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -14,7 +14,7 @@ else
         soc_type = platform
 endif
 
-if soc_type != 'cn9k' and soc_type != 'cn10k'
+if soc_type != 'cn9k' and soc_type != 'cn10k' and soc_type != 'cn20k'
         soc_type = 'all'
 endif
 
@@ -229,6 +229,12 @@ sources += files(
 endif
 endif
 
+if soc_type == 'cn20k' or soc_type == 'all'
+sources += files(
+        'cn20k_eventdev.c',
+)
+endif
+
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
 if cc.get_id() == 'clang'
         extra_flags += ['-Wno-asm-operand-widths']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 04/22] event/cnxk: add CN20K device config
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 02/22] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
                           ` (18 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event device configuration that attaches the requested
number of SSO HWS(event ports) and HWGRP(event queues) LFs to
the RVU PF/VF.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index c4b80f64f3..753a976cd3 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -17,6 +17,17 @@ cn20k_sso_set_rsrc(void *arg)
 					dev->sso.max_hwgrp;
 }
 
+static int
+cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
+{
+	struct cnxk_tim_evdev *tim_dev = cnxk_tim_priv_get();
+	struct cnxk_sso_evdev *dev = arg;
+	uint16_t nb_tim_lfs;
+
+	nb_tim_lfs = tim_dev ? tim_dev->nb_rings : 0;
+	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -27,8 +38,33 @@ cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *de
 	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
 }
 
+static int
+cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = cnxk_sso_dev_validate(event_dev, 1, UINT32_MAX);
+	if (rc < 0) {
+		plt_err("Invalid event device configuration");
+		return -EINVAL;
+	}
+
+	rc = cn20k_sso_rsrc_init(dev, dev->nb_event_ports, dev->nb_event_queues);
+	if (rc < 0) {
+		plt_err("Failed to initialize SSO resources");
+		return -ENODEV;
+	}
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
+	.dev_configure = cn20k_sso_dev_configure,
+
+	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 05/22] event/cnxk: add CN20k event queue configuration
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (2 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 04/22] event/cnxk: add CN20K device config pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
                           ` (17 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add setup and release functions for event queues i.e. SSO HWGRPs.
Allocate buffers in DRAM that hold inflight events.
Register device args to modify inflight event buffer count,
HWGRP QoS and stash.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c |  2 +-
 drivers/event/cnxk/cn20k_eventdev.c | 14 ++++++++++++++
 drivers/event/cnxk/cnxk_eventdev.c  |  4 ++--
 drivers/event/cnxk/cnxk_eventdev.h  |  2 +-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index c7af0fac11..49805dd91d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -1251,7 +1251,7 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
-			      CN10K_SSO_STASH "=<string>"
+			      CNXK_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
 			      CNXK_TIM_RINGS_LMT "=<int>"
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 753a976cd3..b876c36806 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -56,6 +56,12 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 		return -ENODEV;
 	}
 
+	rc = cnxk_sso_xaq_allocate(dev);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+cnxk_rsrc_fini:
+	roc_sso_rsrc_fini(&dev->sso);
 	return rc;
 }
 
@@ -64,6 +70,10 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_configure = cn20k_sso_dev_configure,
 
 	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.queue_setup = cnxk_sso_queue_setup,
+	.queue_release = cnxk_sso_queue_release,
+	.queue_attr_set = cnxk_sso_queue_attribute_set,
+
 	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
@@ -127,3 +137,7 @@ static struct rte_pci_driver cn20k_pci_sso = {
 RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
 RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
 RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
+			      CNXK_SSO_XAE_CNT "=<int>"
+			      CNXK_SSO_GGRP_QOS "=<string>"
+			      CNXK_SSO_STASH "=<string>");
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index ab7420ab79..be6a487b59 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -624,8 +624,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
 			   &dev->force_ena_bp);
 	rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag,
 			   &single_ws);
-	rte_kvargs_process(kvlist, CN10K_SSO_STASH,
-			   &parse_sso_kvargs_stash_dict, dev);
+	rte_kvargs_process(kvlist, CNXK_SSO_STASH, &parse_sso_kvargs_stash_dict,
+			   dev);
 	dev->dual_ws = !single_ws;
 	rte_kvargs_free(kvlist);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 904a9b022d..ba08fa2173 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -27,7 +27,7 @@
 #define CNXK_SSO_GGRP_QOS  "qos"
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
-#define CN10K_SSO_STASH	   "stash"
+#define CNXK_SSO_STASH	   "stash"
 
 #define CNXK_SSO_MAX_PROFILES 2
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 06/22] event/cnxk: add CN20K event port configuration
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (3 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
                           ` (16 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HWS a.k.a event port setup, release, link, unlink
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c         |  63 ++-----
 drivers/event/cnxk/cn20k_eventdev.c         | 174 ++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h         |  26 +++
 drivers/event/cnxk/cnxk_common.h            |  55 +++++++
 drivers/event/cnxk/cnxk_eventdev.h          |   6 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c |   6 +-
 6 files changed, 276 insertions(+), 54 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.h
 create mode 100644 drivers/event/cnxk/cnxk_common.h

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 49805dd91d..43bc6c0bac 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -2,15 +2,16 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include <rte_dmadev_pmd.h>
+
+#include "cn10k_cryptodev_ops.h"
+#include "cn10k_ethdev.h"
 #include "cn10k_tx_worker.h"
 #include "cn10k_worker.h"
-#include "cn10k_ethdev.h"
-#include "cn10k_cryptodev_ops.h"
+#include "cnxk_common.h"
+#include "cnxk_dma_event_dp.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
-#include "cnxk_dma_event_dp.h"
-
-#include <rte_dmadev_pmd.h>
 
 #define CN10K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                           \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
@@ -18,29 +19,6 @@
 #define CN10K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                           \
 	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
 
-static uint32_t
-cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
-{
-	uint32_t wdata = 1;
-
-	if (dev->deq_tmo_ns)
-		wdata |= BIT(16);
-
-	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_NONE:
-	default:
-		break;
-	case CN10K_GW_MODE_PREF:
-		wdata |= BIT(19);
-		break;
-	case CN10K_GW_MODE_PREF_WFE:
-		wdata |= BIT(20) | BIT(19);
-		break;
-	}
-
-	return wdata;
-}
-
 static void *
 cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -61,7 +39,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
 	ws->hws_id = port_id;
 	ws->swtag_req = 0;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
 	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
@@ -99,7 +77,7 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->xaq_lmt = dev->xaq_lmt;
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -220,12 +198,12 @@ cn10k_sso_hws_reset(void *arg, void *hws)
 	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
 
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
@@ -504,18 +482,7 @@ cn10k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
-	switch (event_dev->data->dev_conf.preschedule_type) {
-	default:
-	case RTE_EVENT_PRESCHEDULE_NONE:
-		dev->gw_mode = CN10K_GW_MODE_NONE;
-		break;
-	case RTE_EVENT_PRESCHEDULE:
-		dev->gw_mode = CN10K_GW_MODE_PREF;
-		break;
-	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
-		break;
-	}
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
 
 	rc = cnxk_setup_event_ports(event_dev, cn10k_sso_init_hws_mem,
 				    cn10k_sso_hws_setup);
@@ -598,13 +565,13 @@ cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	/* Check if we have work in PRF_WQE0, if so extract it. */
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
 		       BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index b876c36806..611906a4f0 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,7 +4,87 @@
 
 #include "roc_api.h"
 
+#include "cn20k_eventdev.h"
+#include "cnxk_common.h"
 #include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static void *
+cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws;
+
+	/* Allocate event port memory */
+	ws = rte_zmalloc("cn20k_ws", sizeof(struct cn20k_sso_hws) + RTE_CACHE_LINE_SIZE,
+			 RTE_CACHE_LINE_SIZE);
+	if (ws == NULL) {
+		plt_err("Failed to alloc memory for port=%d", port_id);
+		return NULL;
+	}
+
+	/* First cache line is reserved for cookie */
+	ws = (struct cn20k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
+	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+	ws->hws_id = port_id;
+	ws->swtag_req = 0;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->gw_rdata = SSO_TT_EMPTY << 32;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
+
+	return ws;
+}
+
+static int
+cn20k_sso_hws_link(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_link(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static int
+cn20k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_unlink(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static void
+cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t val;
+
+	ws->grp_base = grp_base;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+	ws->xaq_lmt = dev->xaq_lmt;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
+	ws->aw_lmt = dev->sso.lmt_base;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+
+	/* Set get_work timeout for HWS */
+	val = NSEC2USEC(dev->deq_tmo_ns);
+	val = val ? val - 1 : 0;
+	plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
+}
+
+static void
+cn20k_sso_hws_release(void *arg, void *hws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint16_t i, j;
+
+	for (i = 0; i < CNXK_SSO_MAX_PROFILES; i++)
+		for (j = 0; j < dev->nb_event_queues; j++)
+			roc_sso_hws_unlink(&dev->sso, ws->hws_id, &j, 1, i, 0);
+	memset(ws, 0, sizeof(*ws));
+}
 
 static void
 cn20k_sso_set_rsrc(void *arg)
@@ -60,11 +140,98 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
+
+	rc = cnxk_setup_event_ports(event_dev, cn20k_sso_init_hws_mem, cn20k_sso_hws_setup);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+	/* Restore any prior port-queue mapping. */
+	cnxk_sso_restore_links(event_dev, cn20k_sso_hws_link);
+
+	dev->configured = 1;
+	rte_mb();
+
+	return 0;
 cnxk_rsrc_fini:
 	roc_sso_rsrc_fini(&dev->sso);
+	dev->nb_event_ports = 0;
 	return rc;
 }
 
+static int
+cn20k_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
+		     const struct rte_event_port_conf *port_conf)
+{
+
+	RTE_SET_USED(port_conf);
+	return cnxk_sso_port_setup(event_dev, port_id, cn20k_sso_hws_setup);
+}
+
+static void
+cn20k_sso_port_release(void *port)
+{
+	struct cnxk_sso_hws_cookie *gws_cookie = cnxk_sso_hws_get_cookie(port);
+	struct cnxk_sso_evdev *dev;
+
+	if (port == NULL)
+		return;
+
+	dev = cnxk_sso_pmd_priv(gws_cookie->event_dev);
+	if (!gws_cookie->configured)
+		goto free;
+
+	cn20k_sso_hws_release(dev, port);
+	memset(gws_cookie, 0, sizeof(*gws_cookie));
+free:
+	rte_free(gws_cookie);
+}
+
+static int
+cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_links];
+	uint16_t link;
+
+	RTE_SET_USED(priorities);
+	for (link = 0; link < nb_links; link++)
+		hwgrp_ids[link] = queues[link];
+	nb_links = cn20k_sso_hws_link(dev, port, hwgrp_ids, nb_links, profile);
+
+	return (int)nb_links;
+}
+
+static int
+cn20k_sso_port_unlink_profile(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+			      uint16_t nb_unlinks, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_unlinks];
+	uint16_t unlink;
+
+	for (unlink = 0; unlink < nb_unlinks; unlink++)
+		hwgrp_ids[unlink] = queues[unlink];
+	nb_unlinks = cn20k_sso_hws_unlink(dev, port, hwgrp_ids, nb_unlinks, profile);
+
+	return (int)nb_unlinks;
+}
+
+static int
+cn20k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+		    const uint8_t priorities[], uint16_t nb_links)
+{
+	return cn20k_sso_port_link_profile(event_dev, port, queues, priorities, nb_links, 0);
+}
+
+static int
+cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+		      uint16_t nb_unlinks)
+{
+	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -75,6 +242,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.queue_attr_set = cnxk_sso_queue_attribute_set,
 
 	.port_def_conf = cnxk_sso_port_def_conf,
+	.port_setup = cn20k_sso_port_setup,
+	.port_release = cn20k_sso_port_release,
+	.port_link = cn20k_sso_port_link,
+	.port_unlink = cn20k_sso_port_unlink,
+	.port_link_profile = cn20k_sso_port_link_profile,
+	.port_unlink_profile = cn20k_sso_port_unlink_profile,
+	.timeout_ticks = cnxk_sso_timeout_ticks,
 };
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
new file mode 100644
index 0000000000..5b6c558d5a
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef __CN20K_EVENTDEV_H__
+#define __CN20K_EVENTDEV_H__
+
+#define CN20K_SSO_DEFAULT_STASH_OFFSET -1
+#define CN20K_SSO_DEFAULT_STASH_LENGTH 2
+
+struct __rte_cache_aligned cn20k_sso_hws {
+	uint64_t base;
+	uint32_t gw_wdata;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+	/* Add Work Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
+	uintptr_t aw_lmt;
+	uintptr_t grp_base;
+	uint16_t xae_waes;
+	int32_t xaq_lmt;
+};
+
+#endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
new file mode 100644
index 0000000000..712d82bee7
--- /dev/null
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CNXK_COMMON_H__
+#define __CNXK_COMMON_H__
+
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static uint32_t
+cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
+{
+	uint32_t wdata = 1;
+
+	if (dev->deq_tmo_ns)
+		wdata |= BIT(16);
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	case CNXK_GW_MODE_PREF:
+		wdata |= BIT(19);
+		break;
+	case CNXK_GW_MODE_PREF_WFE:
+		wdata |= BIT(20) | BIT(19);
+		break;
+	}
+
+	return wdata;
+}
+
+static uint8_t
+cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
+{
+	uint8_t gw_mode = 0;
+
+	switch (preschedule_type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		gw_mode = CNXK_GW_MODE_NONE;
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		gw_mode = CNXK_GW_MODE_PREF;
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		gw_mode = CNXK_GW_MODE_PREF_WFE;
+		break;
+	}
+
+	return gw_mode;
+}
+
+#endif /* __CNXK_COMMON_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index ba08fa2173..4066497e6b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -38,9 +38,9 @@
 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
 #define CN9K_DUAL_WS_NB_WS	     2
 
-#define CN10K_GW_MODE_NONE     0
-#define CN10K_GW_MODE_PREF     1
-#define CN10K_GW_MODE_PREF_WFE 2
+#define CNXK_GW_MODE_NONE     0
+#define CNXK_GW_MODE_PREF     1
+#define CNXK_GW_MODE_PREF_WFE 2
 
 #define CNXK_QOS_NORMALIZE(val, min, max, cnt)                                 \
 	(min + val / ((max + cnt - 1) / cnt))
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 311de3d92b..7a3262bcff 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1568,15 +1568,15 @@ cnxk_sso_selftest(const char *dev_name)
 
 	if (roc_model_runtime_is_cn10k()) {
 		printf("Verifying CN10K workslot getwork mode none\n");
-		dev->gw_mode = CN10K_GW_MODE_NONE;
+		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF;
+		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
+		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 07/22] event/cnxk: add CN20K SSO enqueue fast path
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (4 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
                           ` (15 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS fastpath event device enqueue functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  20 +-
 drivers/event/cnxk/cn20k_worker.c   | 384 ++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  21 ++
 drivers/event/cnxk/meson.build      |   1 +
 4 files changed, 425 insertions(+), 1 deletion(-)
 create mode 100644 drivers/event/cnxk/cn20k_worker.c
 create mode 100644 drivers/event/cnxk/cn20k_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 611906a4f0..a5dd03de6e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -5,6 +5,7 @@
 #include "roc_api.h"
 
 #include "cn20k_eventdev.h"
+#include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -108,6 +109,21 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+
+static void
+cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(RTE_ARCH_ARM64)
+
+	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
+	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
+	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -265,8 +281,10 @@ cn20k_sso_init(struct rte_eventdev *event_dev)
 
 	event_dev->dev_ops = &cn20k_sso_dev_ops;
 	/* For secondary processes, the primary has done all the work */
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		cn20k_sso_fp_fns_set(event_dev);
 		return 0;
+	}
 
 	rc = cnxk_sso_init(event_dev);
 	if (rc < 0)
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
new file mode 100644
index 0000000000..c7de493681
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include <rte_vect.h>
+
+#include "roc_api.h"
+
+#include "cn20k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+/* SSO Operations */
+
+static __rte_always_inline uint8_t
+cn20k_sso_hws_new_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint64_t event_ptr = ev->u64;
+	const uint16_t grp = ev->queue_id;
+
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
+	if (ws->xaq_lmt <= *ws->fc_mem)
+		return 0;
+
+	cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grp_base + (grp << 12));
+	return 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_swtag(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
+
+	/* CNXK model
+	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
+	 *
+	 * SSO_TT_ORDERED        norm           norm             untag
+	 * SSO_TT_ATOMIC         norm           norm		   untag
+	 * SSO_TT_UNTAGGED       norm           norm             NOOP
+	 */
+
+	if (new_tt == SSO_TT_UNTAGGED) {
+		if (cur_tt != SSO_TT_UNTAGGED)
+			cnxk_sso_hws_swtag_untag(ws->base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+	} else {
+		cnxk_sso_hws_swtag_norm(tag, new_tt, ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
+	}
+	ws->swtag_req = 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_group(struct cn20k_sso_hws *ws, const struct rte_event *ev, const uint16_t grp)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+
+	plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+	cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_forward_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint8_t grp = ev->queue_id;
+
+	/* Group hasn't changed, Use SWTAG to forward the event */
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
+		cn20k_sso_hws_fwd_swtag(ws, ev);
+	else
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
+		cn20k_sso_hws_fwd_group(ws, ev, grp);
+}
+
+static inline int32_t
+sso_read_xaq_space(struct cn20k_sso_hws *ws)
+{
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
+	       ws->xae_waes;
+}
+
+static inline void
+sso_lmt_aw_wait_fc(struct cn20k_sso_hws *ws, int64_t req)
+{
+	int64_t cached, refill;
+
+retry:
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
+		;
+
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
+	/* Check if there is enough space, else update and retry. */
+	if (cached < 0) {
+		/* Check if we have space else retry. */
+		do {
+			refill = sso_read_xaq_space(ws);
+		} while (refill <= 0);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
+
+		goto retry;
+	}
+}
+
+#define VECTOR_SIZE_BITS	     0xFFFFFFFFFFF80000ULL
+#define VECTOR_GET_LINE_OFFSET(line) (19 + (3 * line))
+
+static uint64_t
+vector_size_partial_mask(uint16_t off, uint16_t cnt)
+{
+	return (VECTOR_SIZE_BITS & ~(~0x0ULL << off)) | ((uint64_t)(cnt - 1) << off);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_new_event_lmtst(struct cn20k_sso_hws *ws, uint8_t queue_id,
+			      const struct rte_event ev[], uint16_t n)
+{
+	uint16_t lines, partial_line, burst, left;
+	uint64_t wdata[2], pa[2] = {0};
+	uintptr_t lmt_addr;
+	uint16_t sz0, sz1;
+	uint16_t lmt_id;
+
+	sz0 = sz1 = 0;
+	lmt_addr = ws->aw_lmt;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	left = n;
+again:
+	burst = RTE_MIN(BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2 + ROC_LMT_LINES_PER_CORE_LOG2), left);
+
+	/* Set wdata */
+	lines = burst >> ROC_SSO_AW_PER_LMT_LINE_LOG2;
+	partial_line = burst & (BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2) - 1);
+	wdata[0] = wdata[1] = 0;
+	if (lines > BIT(ROC_LMT_LINES_PER_STR_LOG2)) {
+		wdata[0] = lmt_id;
+		wdata[0] |= 15ULL << 12;
+		wdata[0] |= VECTOR_SIZE_BITS;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = 16 << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+
+		wdata[1] = lmt_id + 16;
+		pa[1] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+
+		lines -= 17;
+		wdata[1] |= partial_line ? (uint64_t)(lines + 1) << 12 : (uint64_t)(lines << 12);
+		wdata[1] |= partial_line ? vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								    partial_line) :
+					   VECTOR_SIZE_BITS;
+		sz1 = burst - sz0;
+		partial_line = 0;
+	} else if (lines) {
+		/* We need to handle two cases here:
+		 * 1. Partial line spill over to wdata[1] i.e. lines == 16
+		 * 2. Partial line with spill lines < 16.
+		 */
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = lines << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+		if (lines == 16) {
+			wdata[0] |= 15ULL << 12;
+			wdata[0] |= VECTOR_SIZE_BITS;
+			if (partial_line) {
+				wdata[1] = lmt_id + 16;
+				pa[1] = (ws->grp_base + (queue_id << 12) +
+					 SSO_LF_GGRP_OP_AW_LMTST) |
+					((partial_line - 1) << 4);
+			}
+		} else {
+			lines -= 1;
+			wdata[0] |= partial_line ? (uint64_t)(lines + 1) << 12 :
+						   (uint64_t)(lines << 12);
+			wdata[0] |= partial_line ?
+					    vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								     partial_line) :
+					    VECTOR_SIZE_BITS;
+			sz0 += partial_line;
+		}
+		sz1 = burst - sz0;
+		partial_line = 0;
+	}
+
+	/* Only partial lines */
+	if (partial_line) {
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) |
+			((partial_line - 1) << 4);
+		sz0 = partial_line;
+		sz1 = burst - sz0;
+	}
+
+#if defined(RTE_ARCH_ARM64)
+	uint64x2_t aw_mask = {0xC0FFFFFFFFULL, ~0x0ULL};
+	uint64x2_t tt_mask = {0x300000000ULL, 0};
+	uint16_t parts;
+
+	while (burst) {
+		parts = burst > 7 ? 8 : plt_align32prevpow2(burst);
+		burst -= parts;
+		/* Lets try to fill at least one line per burst. */
+		switch (parts) {
+		case 8: {
+			uint64x2_t aw0, aw1, aw2, aw3, aw4, aw5, aw6, aw7;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+			aw4 = vandq_u64(vld1q_u64((const uint64_t *)&ev[4]), aw_mask);
+			aw5 = vandq_u64(vld1q_u64((const uint64_t *)&ev[5]), aw_mask);
+			aw6 = vandq_u64(vld1q_u64((const uint64_t *)&ev[6]), aw_mask);
+			aw7 = vandq_u64(vld1q_u64((const uint64_t *)&ev[7]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+			aw4 = vorrq_u64(vandq_u64(vshrq_n_u64(aw4, 6), tt_mask), aw4);
+			aw5 = vorrq_u64(vandq_u64(vshrq_n_u64(aw5, 6), tt_mask), aw5);
+			aw6 = vorrq_u64(vandq_u64(vshrq_n_u64(aw6, 6), tt_mask), aw6);
+			aw7 = vorrq_u64(vandq_u64(vshrq_n_u64(aw7, 6), tt_mask), aw7);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 64), aw4);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 80), aw5);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 96), aw6);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 112), aw7);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 128);
+		} break;
+		case 4: {
+			uint64x2_t aw0, aw1, aw2, aw3;
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 64);
+		} break;
+		case 2: {
+			uint64x2_t aw0, aw1;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 32);
+		} break;
+		case 1: {
+			__uint128_t aw0;
+
+			aw0 = ev[0].u64;
+			aw0 <<= 64;
+			aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+			aw0 |= (uint64_t)ev[0].sched_type << 32;
+
+			*((__uint128_t *)lmt_addr) = aw0;
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+		} break;
+		}
+		ev += parts;
+	}
+#else
+	uint16_t i;
+
+	for (i = 0; i < burst; i++) {
+		__uint128_t aw0;
+
+		aw0 = ev[0].u64;
+		aw0 <<= 64;
+		aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+		aw0 |= (uint64_t)ev[0].sched_type << 32;
+		*((__uint128_t *)lmt_addr) = aw0;
+		lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+	}
+#endif
+
+	/* wdata[0] will be always valid */
+	sso_lmt_aw_wait_fc(ws, sz0);
+	roc_lmt_submit_steorl(wdata[0], pa[0]);
+	if (wdata[1]) {
+		sso_lmt_aw_wait_fc(ws, sz1);
+		roc_lmt_submit_steorl(wdata[1], pa[1]);
+	}
+
+	left -= (sz0 + sz1);
+	if (left)
+		goto again;
+
+	return n;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	switch (ev->op) {
+	case RTE_EVENT_OP_NEW:
+		return cn20k_sso_hws_new_event(ws, ev);
+	case RTE_EVENT_OP_FORWARD:
+		cn20k_sso_hws_forward_event(ws, ev);
+		break;
+	case RTE_EVENT_OP_RELEASE:
+		if (ws->swtag_req) {
+			cnxk_sso_hws_desched(ev->u64, ws->base);
+			ws->swtag_req = 0;
+			break;
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	uint16_t idx = 0, done = 0, rc = 0;
+	struct cn20k_sso_hws *ws = port;
+	uint8_t queue_id;
+	int32_t space;
+
+	/* Do a common back-pressure check and return */
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
+	if (space <= 0)
+		return 0;
+	nb_events = space < nb_events ? space : nb_events;
+
+	do {
+		queue_id = ev[idx].queue_id;
+		for (idx = idx + 1; idx < nb_events; idx++)
+			if (queue_id != ev[idx].queue_id)
+				break;
+
+		rc = cn20k_sso_hws_new_event_lmtst(ws, queue_id, &ev[done], idx - done);
+		if (rc != (idx - done))
+			return rc + done;
+		done += rc;
+
+	} while (done < nb_events);
+
+	return done;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	cn20k_sso_hws_forward_event(ws, ev);
+
+	return 1;
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
new file mode 100644
index 0000000000..5ff8f11b38
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_WORKER_H__
+#define __CN20K_WORKER_H__
+
+#include <rte_eventdev.h>
+
+#include "cnxk_worker.h"
+#include "cn20k_eventdev.h"
+
+/* CN20K Fastpath functions. */
+uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
+					   uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 21cd5c5ae6..d0dc2320e1 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -232,6 +232,7 @@ endif
 if soc_type == 'cn20k' or soc_type == 'all'
 sources += files(
         'cn20k_eventdev.c',
+        'cn20k_worker.c',
 )
 endif
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 08/22] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (5 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
                           ` (14 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS event dequeue fastpath functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |   5 +
 drivers/event/cnxk/cn20k_worker.c   |  54 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   | 137 +++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a5dd03de6e..d1668a00c1 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -114,11 +114,16 @@ static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
+	if (dev->deq_tmo_ns)
+		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
+
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index c7de493681..2dcde0b444 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -382,3 +382,57 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 
 	return 1;
 }
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(timeout_ticks);
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return 1;
+	}
+
+	return cn20k_sso_hws_get_work(ws, ev, 0);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, 0);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, 0);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5ff8f11b38..8dc60a06ec 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -7,8 +7,136 @@
 
 #include <rte_eventdev.h>
 
-#include "cnxk_worker.h"
 #include "cn20k_eventdev.h"
+#include "cnxk_worker.h"
+
+static __rte_always_inline void
+cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
+{
+	RTE_SET_USED(ws);
+	RTE_SET_USED(flags);
+
+	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
+		 (u64[0] & 0xffffffff);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+	gw.get_work = ws->gw_wdata;
+#if defined(RTE_ARCH_ARM64)
+#if defined(__clang__)
+	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbz %[x0], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(".arch armv8-a+lse\n"
+		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+	gw.u64[0] = x0;
+	gw.u64[1] = x1;
+#else
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbz %[wdata], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [wdata] "=&r"(gw.get_work)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+		     : [wdata] "+r"(gw.get_work)
+		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+#endif
+#else
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
+#endif
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
+
+/* Used in cleaning up workslot. */
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work_empty(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+#ifdef RTE_ARCH_ARM64
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbz %[tag], 63, .Ldone%=		\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbnz %[tag], 63, .Lrty%=		\n"
+		     ".Ldone%=:	dmb ld					\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
+		     : "memory");
+#else
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+#endif
+
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+	else
+		gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+			    (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
 
 /* CN20K Fastpath functions. */
 uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
@@ -18,4 +146,11 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 
+uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+					   uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
+					       uint16_t nb_events, uint64_t timeout_ticks);
+
 #endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 09/22] event/cnxk: add CN20K event port quiesce
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (6 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
                           ` (13 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port quiesce function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 60 +++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index d1668a00c1..56e3eb87fb 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -208,6 +208,65 @@ cn20k_sso_port_release(void *port)
 	rte_free(gws_cookie);
 }
 
+static void
+cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+		       rte_eventdev_port_flush_t flush_cb, void *args)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct cn20k_sso_hws *ws = port;
+	struct rte_event ev;
+	uint64_t ptag;
+	bool is_pend;
+
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+		is_pend = true;
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	if (is_pend && ev.u64)
+		if (flush_cb)
+			flush_cb(event_dev->data->dev_id, ev, args);
+	ptag = (plt_read64(ws->base + SSOW_LF_GWS_TAG) >> 32) & SSO_TT_EMPTY;
+	if (ptag != SSO_TT_EMPTY)
+		cnxk_sso_hws_swtag_flush(ws->base);
+
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & BIT_ULL(56));
+
+	/* Check if we have work in PRF_WQE0, if so extract it. */
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (ev.u64) {
+			if (flush_cb)
+				flush_cb(event_dev->data->dev_id, ev, args);
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		do {
+			ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (ptag & BIT_ULL(56));
+	}
+	ws->swtag_req = 0;
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
 			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
@@ -265,6 +324,7 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_def_conf = cnxk_sso_port_def_conf,
 	.port_setup = cn20k_sso_port_setup,
 	.port_release = cn20k_sso_port_release,
+	.port_quiesce = cn20k_sso_port_quiesce,
 	.port_link = cn20k_sso_port_link,
 	.port_unlink = cn20k_sso_port_unlink,
 	.port_link_profile = cn20k_sso_port_link_profile,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 10/22] event/cnxk: add CN20K event port profile switch
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (7 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
                           ` (12 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port profile switch.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  1 +
 drivers/event/cnxk/cn20k_worker.c   | 11 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  1 +
 3 files changed, 13 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 56e3eb87fb..53b0b43199 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -124,6 +124,7 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 	if (dev->deq_tmo_ns)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
+	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2dcde0b444..2c723523d2 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -383,6 +383,17 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 	return 1;
 }
 
+int __rte_hot
+cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(0xFFUL);
+	ws->gw_wdata |= (profile + 1);
+
+	return 0;
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 8dc60a06ec..447f28f0f2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -145,6 +145,7 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 					       uint16_t nb_events);
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
+int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 11/22] event/cnxk: add CN20K event port preschedule
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (8 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 12/22] event/cnxk: add CN20K device start pbhagavatula
                           ` (11 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port preschedule modify and preschedule
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/ssow.h       |  1 +
 drivers/event/cnxk/cn20k_eventdev.c |  2 ++
 drivers/event/cnxk/cn20k_worker.c   | 30 +++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  3 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
index c146a8c3ef..ec6bd7896b 100644
--- a/drivers/common/cnxk/hw/ssow.h
+++ b/drivers/common/cnxk/hw/ssow.h
@@ -37,6 +37,7 @@
 #define SSOW_LF_GWS_PRF_WQE1	     (0x448ull) /* [CN10K, .) */
 #define SSOW_LF_GWS_OP_GET_WORK0     (0x600ull)
 #define SSOW_LF_GWS_OP_GET_WORK1     (0x608ull) /* [CN10K, .) */
+#define SSOW_LF_GWS_OP_PRF_GETWORK   (0x610ull) /* [CN20K, .) */
 #define SSOW_LF_GWS_OP_SWTAG_FLUSH   (0x800ull)
 #define SSOW_LF_GWS_OP_SWTAG_UNTAG   (0x810ull)
 #define SSOW_LF_GWS_OP_SWTP_CLR	     (0x820ull)
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 53b0b43199..a788eeed63 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -125,6 +125,8 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
+	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
+	event_dev->preschedule = cn20k_sso_hws_preschedule;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2c723523d2..ebfe863bc5 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -394,6 +394,36 @@ cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
 	return 0;
 }
 
+int __rte_hot
+cn20k_sso_hws_preschedule_modify(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(BIT(19) | BIT(20));
+	switch (type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		ws->gw_wdata |= BIT(19);
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		ws->gw_wdata |= BIT(19) | BIT(20);
+		break;
+	}
+
+	return 0;
+}
+
+void __rte_hot
+cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(type);
+	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 447f28f0f2..dd8b72bc53 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -146,6 +146,9 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
+int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
+					       enum rte_event_dev_preschedule_type type);
+void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 12/22] event/cnxk: add CN20K device start
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (9 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
                           ` (10 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K start function along with few cleanup API's to maintain
sanity.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c | 103 +--------------------------
 drivers/event/cnxk/cn20k_eventdev.c |  76 ++++++++++++++++++++
 drivers/event/cnxk/cnxk_common.h    | 104 ++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 100 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 43bc6c0bac..f2e591f547 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -154,83 +154,6 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
 	return 0;
 }
 
-static void
-cn10k_sso_hws_reset(void *arg, void *hws)
-{
-	struct cnxk_sso_evdev *dev = arg;
-	struct cn10k_sso_hws *ws = hws;
-	uintptr_t base = ws->base;
-	uint64_t pend_state;
-	union {
-		__uint128_t wdata;
-		uint64_t u64[2];
-	} gw;
-	uint8_t pend_tt;
-	bool is_pend;
-
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
-	/* Wait till getwork/swtp/waitw/desched completes. */
-	is_pend = false;
-	/* Work in WQE0 is always consumed, unless its a SWTAG. */
-	pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
-	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
-	    ws->swtag_req)
-		is_pend = true;
-
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
-			       BIT_ULL(56) | BIT_ULL(54)));
-	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
-			cnxk_sso_hws_swtag_untag(base +
-						 SSOW_LF_GWS_OP_SWTAG_UNTAG);
-		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-	} else if (pend_tt != SSO_TT_EMPTY) {
-		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-	}
-
-	/* Wait for desched to complete. */
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
-
-	switch (dev->gw_mode) {
-	case CNXK_GW_MODE_PREF:
-	case CNXK_GW_MODE_PREF_WFE:
-		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
-			;
-		break;
-	case CNXK_GW_MODE_NONE:
-	default:
-		break;
-	}
-
-	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
-	    SSO_TT_EMPTY) {
-		plt_write64(BIT_ULL(16) | 1,
-			    ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		do {
-			roc_load_pair(gw.u64[0], gw.u64[1],
-				      ws->base + SSOW_LF_GWS_WQE0);
-		} while (gw.u64[0] & BIT_ULL(63));
-		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-			if (pend_tt == SSO_TT_ATOMIC ||
-			    pend_tt == SSO_TT_ORDERED)
-				cnxk_sso_hws_swtag_untag(
-					base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
-			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-		}
-	}
-
-	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	rte_mb();
-}
-
 static void
 cn10k_sso_set_rsrc(void *arg)
 {
@@ -640,24 +563,6 @@ cn10k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn10k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
-static void
-cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev)
-{
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
-	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
-	int i, rc;
-
-	plt_sso_dbg();
-	for (i = 0; i < dev->stash_cnt; i++) {
-		stash[i].hwgrp = dev->stash_parse_data[i].queue;
-		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
-		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
-	}
-	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
-	if (rc < 0)
-		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
-}
-
 static int
 cn10k_sso_start(struct rte_eventdev *event_dev)
 {
@@ -669,9 +574,8 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
 	if (rc < 0)
 		return rc;
 
-	cn10k_sso_configure_queue_stash(event_dev);
-	rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
-			    cn10k_sso_hws_flush_events);
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 	if (rc < 0)
 		return rc;
 	cn10k_sso_fp_fns_set(event_dev);
@@ -692,8 +596,7 @@ cn10k_sso_stop(struct rte_eventdev *event_dev)
 	for (i = 0; i < event_dev->data->nb_ports; i++)
 		hws[i] = i;
 	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
-	cnxk_sso_stop(event_dev, cn10k_sso_hws_reset,
-		      cn10k_sso_hws_flush_events);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 }
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a788eeed63..69c593ed60 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -87,6 +87,61 @@ cn20k_sso_hws_release(void *arg, void *hws)
 	memset(ws, 0, sizeof(*ws));
 }
 
+static int
+cn20k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cnxk_handle_event_t fn,
+			   void *arg)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg);
+	uint64_t retry = CNXK_SSO_FLUSH_RETRY_MAX;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t cq_ds_cnt = 1;
+	uint64_t aq_cnt = 1;
+	uint64_t ds_cnt = 1;
+	struct rte_event ev;
+	uint64_t val, req;
+
+	plt_write64(0, base + SSO_LF_GGRP_QCTL);
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	req = queue_id;	    /* GGRP ID */
+	req |= BIT_ULL(18); /* Grouped */
+	req |= BIT_ULL(16); /* WAIT */
+
+	aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+	ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+	cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+	cq_ds_cnt &= 0x3FFF3FFF0000;
+
+	while (aq_cnt || cq_ds_cnt || ds_cnt) {
+		plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (fn != NULL && ev.u64 != 0)
+			fn(arg, ev);
+		if (ev.sched_type != SSO_TT_EMPTY)
+			cnxk_sso_hws_swtag_flush(ws->base);
+		else if (retry-- == 0)
+			break;
+		do {
+			val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (val & BIT_ULL(56));
+		aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+		ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+		cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+		/* Extract cq and ds count */
+		cq_ds_cnt &= 0x3FFF3FFF0000;
+	}
+
+	if (aq_cnt || cq_ds_cnt || ds_cnt)
+		return -EAGAIN;
+
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	rte_mb();
+
+	return 0;
+}
+
 static void
 cn20k_sso_set_rsrc(void *arg)
 {
@@ -315,6 +370,25 @@ cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
+static int
+cn20k_sso_start(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int rc, i;
+
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set(event_dev);
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -333,6 +407,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_link_profile = cn20k_sso_port_link_profile,
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
+
+	.dev_start = cn20k_sso_start,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
index 712d82bee7..c361d0530d 100644
--- a/drivers/event/cnxk/cnxk_common.h
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -8,6 +8,15 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+struct cnxk_sso_hws_prf {
+	uint64_t base;
+	uint32_t gw_wdata;
+	void *lookup_mem;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+};
+
 static uint32_t
 cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
 {
@@ -52,4 +61,99 @@ cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
 	return gw_mode;
 }
 
+static void
+cnxk_sso_hws_reset(void *arg, void *ws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cnxk_sso_hws_prf *ws_prf;
+	uint64_t pend_state;
+	uint8_t swtag_req;
+	uintptr_t base;
+	uint8_t hws_id;
+	union {
+		__uint128_t wdata;
+		uint64_t u64[2];
+	} gw;
+	uint8_t pend_tt;
+	bool is_pend;
+
+	ws_prf = ws;
+	base = ws_prf->base;
+	hws_id = ws_prf->hws_id;
+	swtag_req = ws_prf->swtag_req;
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	/* Wait till getwork/swtp/waitw/desched completes. */
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || swtag_req)
+		is_pend = true;
+
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state &
+		 (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+			cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+	} else if (pend_tt != SSO_TT_EMPTY) {
+		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+	}
+
+	/* Wait for desched to complete. */
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, base + SSOW_LF_GWS_OP_GET_WORK0);
+		do {
+			roc_load_pair(gw.u64[0], gw.u64[1], base + SSOW_LF_GWS_WQE0);
+		} while (gw.u64[0] & BIT_ULL(63));
+		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+			if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+				cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+		}
+	}
+
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	rte_mb();
+}
+
+static void
+cnxk_sso_configure_queue_stash(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
+	int i, rc;
+
+	plt_sso_dbg();
+	for (i = 0; i < dev->stash_cnt; i++) {
+		stash[i].hwgrp = dev->stash_parse_data[i].queue;
+		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
+		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
+	}
+	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
+	if (rc < 0)
+		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+}
+
 #endif /* __CNXK_COMMON_H__ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 13/22] event/cnxk: add CN20K device stop and close
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (10 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 12/22] event/cnxk: add CN20K device start pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
                           ` (9 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event device stop and close callback functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 69c593ed60..6195b29705 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -389,6 +389,25 @@ cn20k_sso_start(struct rte_eventdev *event_dev)
 	return rc;
 }
 
+static void
+cn20k_sso_stop(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int i;
+
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+}
+
+static int
+cn20k_sso_close(struct rte_eventdev *event_dev)
+{
+	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -409,6 +428,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
 	.dev_start = cn20k_sso_start,
+	.dev_stop = cn20k_sso_stop,
+	.dev_close = cn20k_sso_close,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 14/22] event/cnxk: add CN20K xstats, selftest and dump
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (11 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
                           ` (8 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add selftest to verify SSO, xstats to get queue specific
stats and add function to dump internal state of SSO.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test/test_eventdev.c                    |  7 +++++++
 drivers/event/cnxk/cn20k_eventdev.c         | 12 ++++++++++++
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  8 ++++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c
index b03a62fe70..e97754bd47 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -1521,6 +1521,12 @@ test_eventdev_selftest_cn10k(void)
 	return test_eventdev_selftest_impl("event_cn10k", "");
 }
 
+static int
+test_eventdev_selftest_cn20k(void)
+{
+	return test_eventdev_selftest_impl("event_cn20k", "");
+}
+
 #endif /* !RTE_EXEC_ENV_WINDOWS */
 
 REGISTER_FAST_TEST(eventdev_common_autotest, true, true, test_eventdev_common);
@@ -1532,5 +1538,6 @@ REGISTER_DRIVER_TEST(eventdev_selftest_dpaa2, test_eventdev_selftest_dpaa2);
 REGISTER_DRIVER_TEST(eventdev_selftest_dlb2, test_eventdev_selftest_dlb2);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn9k, test_eventdev_selftest_cn9k);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn10k, test_eventdev_selftest_cn10k);
+REGISTER_DRIVER_TEST(eventdev_selftest_cn20k, test_eventdev_selftest_cn20k);
 
 #endif /* !RTE_EXEC_ENV_WINDOWS */
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 6195b29705..793098bd61 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -408,6 +408,12 @@ cn20k_sso_close(struct rte_eventdev *event_dev)
 	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
 }
 
+static int
+cn20k_sso_selftest(void)
+{
+	return cnxk_sso_selftest(RTE_STR(event_cn20k));
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -427,9 +433,15 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.xstats_get = cnxk_sso_xstats_get,
+	.xstats_reset = cnxk_sso_xstats_reset,
+	.xstats_get_names = cnxk_sso_xstats_get_names,
+
+	.dump = cnxk_sso_dump,
 	.dev_start = cn20k_sso_start,
 	.dev_stop = cn20k_sso_stop,
 	.dev_close = cn20k_sso_close,
+	.dev_selftest = cn20k_sso_selftest,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 7a3262bcff..8f3d0982e9 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1566,16 +1566,16 @@ cnxk_sso_selftest(const char *dev_name)
 			return rc;
 	}
 
-	if (roc_model_runtime_is_cn10k()) {
-		printf("Verifying CN10K workslot getwork mode none\n");
+	if (roc_model_runtime_is_cn10k() || roc_model_runtime_is_cn20k()) {
+		printf("Verifying %s workslot getwork mode none\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode prefetch\n");
+		printf("Verifying %s workslot getwork mode prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
+		printf("Verifying %s workslot getwork mode smart prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 15/22] event/cnxk: support CN20K Rx adapter
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (12 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
                           ` (7 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for CN20K event eth Rx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 121 +++++++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_eventdev.h |   4 +
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 793098bd61..602fbd6359 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,6 +4,7 @@
 
 #include "roc_api.h"
 
+#include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
@@ -414,6 +415,117 @@ cn20k_sso_selftest(void)
 	return cnxk_sso_selftest(RTE_STR(event_cn20k));
 }
 
+static int
+cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+			      const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+	int rc;
+
+	RTE_SET_USED(event_dev);
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 9);
+	if (rc)
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+	else
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+	return 0;
+}
+
+static void
+cn20k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		ws->xaq_lmt = dev->xaq_lmt;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+		ws->tstamp = dev->tstamp;
+		if (lookup_mem)
+			ws->lookup_mem = lookup_mem;
+	}
+}
+
+static void
+eventdev_fops_tstamp_update(struct rte_eventdev *event_dev)
+{
+	struct rte_event_fp_ops *fp_op = rte_event_fp_ops + event_dev->data->dev_id;
+
+	fp_op->dequeue_burst = event_dev->dequeue_burst;
+}
+
+static void
+cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct cnxk_eth_dev *cnxk_eth_dev = dev->data->dev_private;
+	struct rte_eventdev *event_dev = cnxk_eth_dev->evdev_priv;
+	struct cnxk_sso_evdev *evdev = cnxk_sso_pmd_priv(event_dev);
+
+	evdev->rx_offloads |= flags;
+	if (ptp_en)
+		evdev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
+	else
+		evdev->tstamp[port_id] = NULL;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	eventdev_fops_tstamp_update(event_dev);
+}
+
+static int
+cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
+			       const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash;
+	struct cn20k_eth_rxq *rxq;
+	void *lookup_mem;
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	if (rc)
+		return -EINVAL;
+
+	cnxk_eth_dev->cnxk_sso_ptp_tstamp_cb = cn20k_sso_tstamp_hdl_update;
+	cnxk_eth_dev->evdev_priv = (struct rte_eventdev *)(uintptr_t)event_dev;
+
+	rxq = eth_dev->data->rx_queues[0];
+	lookup_mem = rxq->lookup_mem;
+	cn20k_sso_set_priv_mem(event_dev, lookup_mem);
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	if (roc_feature_sso_has_stash() && dev->nb_event_ports > 1) {
+		stash.hwgrp = queue_conf->ev.queue_id;
+		stash.stash_offset = CN20K_SSO_DEFAULT_STASH_OFFSET;
+		stash.stash_count = CN20K_SSO_DEFAULT_STASH_LENGTH;
+		rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1);
+		if (rc < 0)
+			plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+	}
+
+	return 0;
+}
+
+static int
+cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id)
+{
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -433,6 +545,12 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.eth_rx_adapter_caps_get = cn20k_sso_rx_adapter_caps_get,
+	.eth_rx_adapter_queue_add = cn20k_sso_rx_adapter_queue_add,
+	.eth_rx_adapter_queue_del = cn20k_sso_rx_adapter_queue_del,
+	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -509,4 +627,5 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
-			      CNXK_SSO_STASH "=<string>");
+			      CNXK_SSO_STASH "=<string>"
+			      CNXK_SSO_FORCE_BP "=1");
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 5b6c558d5a..7a6363a89e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -11,9 +11,13 @@
 struct __rte_cache_aligned cn20k_sso_hws {
 	uint64_t base;
 	uint32_t gw_wdata;
+	void *lookup_mem;
 	uint64_t gw_rdata;
 	uint8_t swtag_req;
 	uint8_t hws_id;
+	/* PTP timestamp */
+	struct cnxk_timesync_info **tstamp;
+	uint64_t meta_aura;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
 	int64_t __rte_atomic *fc_cache_space;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 16/22] event/cnxk: support CN20K Rx adapter fast path
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (13 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
                           ` (6 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c           | 122 ++++++++++++-
 drivers/event/cnxk/cn20k_worker.c             |  54 ------
 drivers/event/cnxk/cn20k_worker.h             | 165 +++++++++++++++++-
 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c |  22 +++
 .../cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c   |  22 +++
 .../event/cnxk/deq/cn20k/deq_112_127_burst.c  |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_seg_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_tmo_burst.c    |  22 +++
 .../deq/cn20k/deq_112_127_tmo_seg_burst.c     |  22 +++
 .../event/cnxk/deq/cn20k/deq_16_31_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_32_47_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_burst.c      |  23 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_48_63_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_64_79_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_80_95_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_96_111_burst.c   |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_seg_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_all_offload.c    |  65 +++++++
 drivers/event/cnxk/meson.build                |  43 +++++
 37 files changed, 1085 insertions(+), 69 deletions(-)
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 602fbd6359..408014036a 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -11,6 +11,9 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+#define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
+	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -165,21 +168,124 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+#if defined(RTE_ARCH_ARM64)
+static inline void
+cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
+{
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	const event_dequeue_burst_t sso_hws_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_seg_burst);
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_seg_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_deq_seg_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_seg_burst);
+		}
+	} else {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst, sso_hws_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_burst);
+		}
+	}
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
+static inline void
+cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
+	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
+		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+#endif
 
 static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	cn20k_sso_fp_blk_fns_set(event_dev);
+	cn20k_sso_fp_tmplt_fns_set(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
-	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
-	if (dev->deq_tmo_ns)
-		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
-
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
 	event_dev->preschedule = cn20k_sso_hws_preschedule;
@@ -286,7 +392,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
 	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
 
-	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	cn20k_sso_hws_get_work_empty(ws, &ev,
+				     (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 	if (is_pend && ev.u64)
 		if (flush_cb)
 			flush_cb(event_dev->data->dev_id, ev, args);
@@ -312,7 +419,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
 		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		cn20k_sso_hws_get_work_empty(
+			ws, &ev, (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 		if (ev.u64) {
 			if (flush_cb)
 				flush_cb(event_dev->data->dev_id, ev, args);
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index ebfe863bc5..53daf3b4b0 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -423,57 +423,3 @@ cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
 	RTE_SET_USED(type);
 	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
 }
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return 1;
-	}
-
-	return cn20k_sso_hws_get_work(ws, ev, 0);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return ret;
-	}
-
-	ret = cn20k_sso_hws_get_work(ws, ev, 0);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn20k_sso_hws_get_work(ws, ev, 0);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			    uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index dd8b72bc53..9075073fd2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -8,16 +8,64 @@
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
+#include "cn20k_rx.h"
 #include "cnxk_worker.h"
 
+/* CN20K Rx event fastpath */
+
+static __rte_always_inline void
+cn20k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id, const uint32_t tag,
+		  const uint32_t flags, const void *const lookup_mem, uintptr_t cpth,
+		  uintptr_t sa_base)
+{
+	const uint64_t mbuf_init =
+		0x100010000ULL | RTE_PKTMBUF_HEADROOM | (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+	struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf;
+
+	cn20k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, (struct rte_mbuf *)mbuf, lookup_mem,
+			      mbuf_init | ((uint64_t)port_id) << 48, cpth, sa_base, flags);
+}
+
+static void
+cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info *tstamp)
+{
+	uint64_t tstamp_ptr;
+	uint8_t laptr;
+
+	laptr = (uint8_t)*(uint64_t *)(u64 + (CNXK_SSO_WQE_LAYR_PTR * sizeof(uint64_t)));
+	if (laptr == sizeof(uint64_t)) {
+		/* Extracting tstamp, if PTP enabled*/
+		tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64) + CNXK_SSO_WQE_SG_PTR);
+		cn20k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, true,
+					 (uint64_t *)tstamp_ptr);
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
-	RTE_SET_USED(ws);
-	RTE_SET_USED(flags);
+	uintptr_t sa_base = 0;
 
 	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
 		 (u64[0] & 0xffffffff);
+	if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		uintptr_t cpth = 0;
+		uint64_t mbuf;
+
+		mbuf = u64[1] - sizeof(struct rte_mbuf);
+		rte_prefetch0((void *)mbuf);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(((struct rte_mbuf *)mbuf)->pool, (void **)&mbuf, 1, 1);
+
+		u64[0] = CNXK_CLR_SUB_EVENT(u64[0]);
+		cn20k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xFFFFF, flags, ws->lookup_mem, cpth,
+				  sa_base);
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
+		u64[1] = mbuf;
+	}
 }
 
 static __rte_always_inline uint16_t
@@ -150,11 +198,112 @@ int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
 					       enum rte_event_dev_preschedule_type type);
 void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
-uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-					   uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-					       uint16_t nb_events, uint64_t timeout_ticks);
+#define R(name, flags)                                                                             \
+	uint16_t __rte_hot cn20k_sso_hws_deq_burst_##name(                                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_burst_##name(                                      \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_seg_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_seg_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_seg_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_seg_burst_##name(                              \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_burst_##name(                                    \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_seg_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_seg_burst_##name(                            \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_seg_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_seg_burst_##name(                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_DEQ(fn, flags)                                                                         \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(timeout_ticks);                                                       \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return 1;                                                                  \
+		}                                                                                  \
+		return cn20k_sso_hws_get_work(ws, ev, flags);                                      \
+	}
+
+#define SSO_DEQ_SEG(fn, flags) SSO_DEQ(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define SSO_DEQ_TMO(fn, flags)                                                                     \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint16_t ret = 1;                                                                  \
+		uint64_t iter;                                                                     \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return ret;                                                                \
+		}                                                                                  \
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);                                       \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)                         \
+			ret = cn20k_sso_hws_get_work(ws, ev, flags);                               \
+		return ret;                                                                        \
+	}
+
+#define SSO_DEQ_TMO_SEG(fn, flags) SSO_DEQ_TMO(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define R(name, flags)                                                                             \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                                                   \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)                              \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                                           \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                                           \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)                                   \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_CMN_DEQ_BURST(fnb, fn, flags)                                                          \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+#define SSO_CMN_DEQ_SEG_BURST(fnb, fn, flags)                                                      \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[],
+						       uint16_t nb_events, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[],
+							   uint16_t nb_events,
+							   uint64_t timeout_ticks);
 
 #endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
new file mode 100644
index 0000000000..f7e0e8fe71
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
new file mode 100644
index 0000000000..7d5d4823c3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		      cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
new file mode 100644
index 0000000000..1bdc4bc82d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
new file mode 100644
index 0000000000..d3ed5fcac0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                                             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,                                  \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)                                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,                             \
+			  cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
new file mode 100644
index 0000000000..29c21441cf
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
new file mode 100644
index 0000000000..004b5ecb95
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
new file mode 100644
index 0000000000..d544b39e9e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
new file mode 100644
index 0000000000..ba7a1207ad
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
new file mode 100644
index 0000000000..eb7382e9d9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
new file mode 100644
index 0000000000..770b7221e6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
new file mode 100644
index 0000000000..1e71d22fc3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
new file mode 100644
index 0000000000..1a9e7efa0a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
new file mode 100644
index 0000000000..3d51bd6659
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
new file mode 100644
index 0000000000..851b5b7d31
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
new file mode 100644
index 0000000000..038ba726a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name,                   \
+			  flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
new file mode 100644
index 0000000000..68fb3ff53d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
new file mode 100644
index 0000000000..84f3ccd39c
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
new file mode 100644
index 0000000000..417f622412
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
new file mode 100644
index 0000000000..7fbea69134
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
new file mode 100644
index 0000000000..3bee216768
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
new file mode 100644
index 0000000000..9b341a0df5
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
new file mode 100644
index 0000000000..1f051f74a9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			   cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
new file mode 100644
index 0000000000..c134e27f25
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
new file mode 100644
index 0000000000..849e8e12fc
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		 cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
new file mode 100644
index 0000000000..9724caf5d6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
new file mode 100644
index 0000000000..997c208511
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
new file mode 100644
index 0000000000..bcf32e646b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
new file mode 100644
index 0000000000..b24e73439a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
new file mode 100644
index 0000000000..c03d034b66
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
new file mode 100644
index 0000000000..b37ef7a998
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
new file mode 100644
index 0000000000..da76b589a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
new file mode 100644
index 0000000000..3a8c02e4d2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_all_offload.c b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
new file mode 100644
index 0000000000..3983736b7e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[], uint16_t nb_events,
+				    uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[], uint16_t nb_events,
+					uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_TSTAMP_F | NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index d0dc2320e1..a2bafab268 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -234,6 +234,49 @@ sources += files(
         'cn20k_eventdev.c',
         'cn20k_worker.c',
 )
+
+if host_machine.cpu_family().startswith('aarch') and not disable_template
+sources += files(
+        'deq/cn20k/deq_0_15_burst.c',
+        'deq/cn20k/deq_16_31_burst.c',
+        'deq/cn20k/deq_32_47_burst.c',
+        'deq/cn20k/deq_48_63_burst.c',
+        'deq/cn20k/deq_64_79_burst.c',
+        'deq/cn20k/deq_80_95_burst.c',
+        'deq/cn20k/deq_96_111_burst.c',
+        'deq/cn20k/deq_112_127_burst.c',
+        'deq/cn20k/deq_0_15_seg_burst.c',
+        'deq/cn20k/deq_16_31_seg_burst.c',
+        'deq/cn20k/deq_32_47_seg_burst.c',
+        'deq/cn20k/deq_48_63_seg_burst.c',
+        'deq/cn20k/deq_64_79_seg_burst.c',
+        'deq/cn20k/deq_80_95_seg_burst.c',
+        'deq/cn20k/deq_96_111_seg_burst.c',
+        'deq/cn20k/deq_112_127_seg_burst.c',
+        'deq/cn20k/deq_0_15_tmo_burst.c',
+        'deq/cn20k/deq_16_31_tmo_burst.c',
+        'deq/cn20k/deq_32_47_tmo_burst.c',
+        'deq/cn20k/deq_48_63_tmo_burst.c',
+        'deq/cn20k/deq_64_79_tmo_burst.c',
+        'deq/cn20k/deq_80_95_tmo_burst.c',
+        'deq/cn20k/deq_96_111_tmo_burst.c',
+        'deq/cn20k/deq_112_127_tmo_burst.c',
+        'deq/cn20k/deq_0_15_tmo_seg_burst.c',
+        'deq/cn20k/deq_16_31_tmo_seg_burst.c',
+        'deq/cn20k/deq_32_47_tmo_seg_burst.c',
+        'deq/cn20k/deq_48_63_tmo_seg_burst.c',
+        'deq/cn20k/deq_64_79_tmo_seg_burst.c',
+        'deq/cn20k/deq_80_95_tmo_seg_burst.c',
+        'deq/cn20k/deq_96_111_tmo_seg_burst.c',
+        'deq/cn20k/deq_112_127_tmo_seg_burst.c',
+        'deq/cn20k/deq_all_offload.c',
+)
+
+else
+sources += files(
+        'deq/cn20k/deq_all_offload.c',
+)
+endif
 endif
 
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 17/22] event/cnxk: support CN20K Tx adapter
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (14 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
                           ` (5 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c  | 126 +++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h  |   4 +
 drivers/event/cnxk/cn20k_tx_worker.h |  16 ++++
 3 files changed, 146 insertions(+)
 create mode 100644 drivers/event/cnxk/cn20k_tx_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 408014036a..509c6ea630 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -6,6 +6,7 @@
 
 #include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
+#include "cn20k_tx_worker.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
@@ -168,6 +169,35 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+static int
+cn20k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	if (dev->tx_adptr_data == NULL)
+		return 0;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		void *ws_cookie;
+
+		ws_cookie = cnxk_sso_hws_get_cookie(ws);
+		ws_cookie = rte_realloc_socket(ws_cookie,
+					       sizeof(struct cnxk_sso_hws_cookie) +
+						       sizeof(struct cn20k_sso_hws) +
+						       dev->tx_adptr_data_sz,
+					       RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+		if (ws_cookie == NULL)
+			return -ENOMEM;
+		ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+		memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, dev->tx_adptr_data_sz);
+		event_dev->data->ports[i] = ws;
+	}
+
+	return 0;
+}
+
 #if defined(RTE_ARCH_ARM64)
 static inline void
 cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
@@ -634,6 +664,95 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+			      uint32_t *caps)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		*caps = 0;
+	else
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+	return 0;
+}
+
+static void
+cn20k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cn20k_eth_txq *txq;
+	struct roc_nix_sq *sq;
+	int i;
+
+	if (tx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+			cn20k_sso_txq_fc_update(eth_dev, i);
+	} else {
+		uint16_t sqes_per_sqb;
+
+		sq = &cnxk_eth_dev->sqs[tx_queue_id];
+		txq = eth_dev->data->tx_queues[tx_queue_id];
+		sqes_per_sqb = 1U << txq->sqes_per_sqb_log2;
+		if (cnxk_eth_dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
+			sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc / sqes_per_sqb);
+		txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
+	}
+}
+
+static int
+cn20k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint64_t tx_offloads;
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+
+	/* Can't enable tstamp if all the ports don't have it enabled. */
+	tx_offloads = cnxk_eth_dev->tx_offload_flags;
+	if (dev->tx_adptr_configured) {
+		uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+		uint8_t tstmp_ena = !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+		if (tstmp_ena && !tstmp_req)
+			dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+		else if (!tstmp_ena && tstmp_req)
+			tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+	}
+
+	dev->tx_offloads |= tx_offloads;
+	cn20k_sso_txq_fc_update(eth_dev, tx_queue_id);
+	rc = cn20k_sso_updt_tx_adptr_data(event_dev);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	dev->tx_adptr_configured = 1;
+
+	return 0;
+}
+
+static int
+cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+	return cn20k_sso_updt_tx_adptr_data(event_dev);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -659,6 +778,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
+	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
+	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
+	.eth_tx_adapter_start = cnxk_sso_tx_adapter_start,
+	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
+	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 7a6363a89e..8ea2878fa5 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -25,6 +25,10 @@ struct __rte_cache_aligned cn20k_sso_hws {
 	uintptr_t grp_base;
 	uint16_t xae_waes;
 	int32_t xaq_lmt;
+	/* Tx Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
+	uint64_t lso_tun_fmt;
+	uint8_t tx_adptr_data[];
 };
 
 #endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
new file mode 100644
index 0000000000..63fbdf5328
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_TX_WORKER_H__
+#define __CN20K_TX_WORKER_H__
+
+#include <rte_eventdev.h>
+#include <rte_vect.h>
+
+#include "cn20k_eventdev.h"
+#include "cn20k_tx.h"
+#include "cnxk_eventdev_dp.h"
+#include <rte_event_eth_tx_adapter.h>
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 18/22] event/cnxk: support CN20K Tx adapter fast path
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (15 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 19/22] common/cnxk: add SSO event aggregator pbhagavatula
                           ` (4 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c          |  29 +++
 drivers/event/cnxk/cn20k_tx_worker.h         | 176 +++++++++++++++++++
 drivers/event/cnxk/meson.build               |  20 +++
 drivers/event/cnxk/tx/cn20k/tx_0_15.c        |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c    |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127.c     |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111.c      |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c  |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_all_offload.c |  40 +++++
 20 files changed, 561 insertions(+)
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 509c6ea630..5d49a5e5c6 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -15,6 +15,9 @@
 #define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
 
+#define CN20K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                                               \
+	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -253,6 +256,19 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 #undef R
 	};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq_seg[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_seg_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
 	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
 		if (dev->rx_offloads & NIX_RX_REAS_F) {
 			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
@@ -285,6 +301,12 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 		}
 	}
 
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq_seg);
+	else
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq);
+
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 #else
 	RTE_SET_USED(event_dev);
 #endif
@@ -299,6 +321,13 @@ cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
 	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
 	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
 		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+	event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	if (dev->tx_offloads & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | NIX_TX_OFFLOAD_VLAN_QINQ_F |
+				NIX_TX_OFFLOAD_TSO_F | NIX_TX_OFFLOAD_TSTAMP_F)) {
+		event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+		event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+	}
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 63fbdf5328..c8ab560b0e 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -13,4 +13,180 @@
 #include "cnxk_eventdev_dp.h"
 #include <rte_event_eth_tx_adapter.h>
 
+/* CN20K Tx event fastpath */
+
+static __rte_always_inline struct cn20k_eth_txq *
+cn20k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
+{
+	return (struct cn20k_eth_txq *)(txq_data[(txq_data[m->port] >> 48) +
+						 rte_event_eth_tx_adapter_txq_get(m)] &
+					(BIT_ULL(48) - 1));
+}
+
+static __rte_always_inline void
+cn20k_sso_txq_fc_wait(const struct cn20k_eth_txq *txq)
+{
+	int64_t avail;
+
+#ifdef RTE_ARCH_ARM64
+	int64_t val;
+
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.gt .Ldne%=				\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.le .Lrty%=				\n"
+		     ".Ldne%=:						\n"
+		     : [refill] "=&r"(avail), [val] "=&r" (val)
+		     : [addr] "r" (txq->fc_mem), [adj] "r" (txq->nb_sqb_bufs_adj),
+		       [shft] "r" (txq->sqes_per_sqb_log2)
+		     : "memory");
+#else
+	do {
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
+	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
+#endif
+}
+
+static __rte_always_inline int32_t
+cn20k_sso_sq_depth(const struct cn20k_eth_txq *txq)
+{
+	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
+	return (avail << txq->sqes_per_sqb_log2) - avail;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
+		 uintptr_t lmt_addr, uint8_t sched_type, const uint64_t *txq_data,
+		 const uint32_t flags)
+{
+	uint8_t lnum = 0, loff = 0, shft = 0;
+	struct rte_mbuf *extm = NULL;
+	struct cn20k_eth_txq *txq;
+	uintptr_t laddr;
+	uint16_t segdw;
+	uintptr_t pa;
+	bool sec;
+
+	txq = cn20k_sso_hws_xtract_meta(m, txq_data);
+	if (cn20k_sso_sq_depth(txq) <= 0)
+		return 0;
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1);
+
+	cn20k_nix_tx_skeleton(txq, cmd, flags, 0);
+	/* Perform header writes before barrier
+	 * for TSO
+	 */
+	if (flags & NIX_TX_OFFLOAD_TSO_F)
+		cn20k_nix_xmit_prepare_tso(m, flags);
+
+	cn20k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, &sec, txq->mark_flag,
+			       txq->mark_fmt);
+
+	laddr = lmt_addr;
+	/* Prepare CPT instruction and get nixtx addr if
+	 * it is for CPT on same lmtline.
+	 */
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff, &shft, txq->sa_base,
+				   flags);
+
+	/* Move NIX desc to LMT/NIXTX area */
+	cn20k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
+
+	if (flags & NIX_TX_MULTI_SEG_F)
+		segdw = cn20k_nix_prepare_mseg(txq, m, &extm, (uint64_t *)laddr, flags);
+	else
+		segdw = cn20k_nix_tx_ext_subs(flags) + 2;
+
+	cn20k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		pa = txq->cpt_io_addr | 3 << 4;
+	else
+		pa = txq->io_addr | ((segdw - 1) << 4);
+
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
+
+	cn20k_sso_txq_fc_wait(txq);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_sec_fc_wait_one(txq);
+
+	roc_lmt_submit_steorl(lmt_id, pa);
+
+	/* Memory barrier to make sure lmtst store completes */
+	rte_io_wmb();
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+		cn20k_nix_free_extmbuf(extm);
+
+	return 1;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
+		       const uint64_t *txq_data, const uint32_t flags)
+{
+	struct rte_mbuf *m;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+
+	lmt_addr = ws->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	m = ev->mbuf;
+	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
+}
+
+#define T(name, sz, flags)                                                                         \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_##name(void *port, struct rte_event ev[],    \
+							     uint16_t nb_events);                  \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
+#define SSO_TX(fn, sz, flags)                                                                      \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint64_t cmd[sz];                                                                  \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data, flags);         \
+	}
+
+#define SSO_TX_SEG(fn, sz, flags)                                                                  \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];                               \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data,                 \
+					      (flags) | NIX_TX_MULTI_SEG_F);                       \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[],
+							      uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+								  uint16_t nb_events);
+
 #endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index a2bafab268..8aaf8116f7 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -272,9 +272,29 @@ sources += files(
         'deq/cn20k/deq_all_offload.c',
 )
 
+sources += files(
+        'tx/cn20k/tx_0_15.c',
+        'tx/cn20k/tx_16_31.c',
+        'tx/cn20k/tx_32_47.c',
+        'tx/cn20k/tx_48_63.c',
+        'tx/cn20k/tx_64_79.c',
+        'tx/cn20k/tx_80_95.c',
+        'tx/cn20k/tx_96_111.c',
+        'tx/cn20k/tx_112_127.c',
+        'tx/cn20k/tx_0_15_seg.c',
+        'tx/cn20k/tx_16_31_seg.c',
+        'tx/cn20k/tx_32_47_seg.c',
+        'tx/cn20k/tx_48_63_seg.c',
+        'tx/cn20k/tx_64_79_seg.c',
+        'tx/cn20k/tx_80_95_seg.c',
+        'tx/cn20k/tx_96_111_seg.c',
+        'tx/cn20k/tx_112_127_seg.c',
+        'tx/cn20k/tx_all_offload.c',
+)
 else
 sources += files(
         'deq/cn20k/deq_all_offload.c',
+        'tx/cn20k/tx_all_offload.c',
 )
 endif
 endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15.c b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
new file mode 100644
index 0000000000..b681bc8ab0
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
new file mode 100644
index 0000000000..1dacb63d4b
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127.c b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
new file mode 100644
index 0000000000..abdb8b76a1
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
new file mode 100644
index 0000000000..c39d331b25
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31.c b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
new file mode 100644
index 0000000000..5b88c47914
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
new file mode 100644
index 0000000000..13f00ac478
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47.c b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
new file mode 100644
index 0000000000..1f6008c425
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
new file mode 100644
index 0000000000..587f22df3a
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63.c b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
new file mode 100644
index 0000000000..c712825417
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
new file mode 100644
index 0000000000..1fc11ec904
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79.c b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
new file mode 100644
index 0000000000..0e427f79d8
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
new file mode 100644
index 0000000000..6e1ae41b26
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95.c b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
new file mode 100644
index 0000000000..8c87d2341d
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
new file mode 100644
index 0000000000..43a143f4bd
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111.c b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
new file mode 100644
index 0000000000..1a43af8b02
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
new file mode 100644
index 0000000000..e0e1d8a4ef
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_all_offload.c b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
new file mode 100644
index 0000000000..d2158a4256
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[], uint16_t nb_events)
+{
+	const uint32_t flags = (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_MBUF_NOFF_F |
+				NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+					       uint16_t nb_events)
+{
+	const uint32_t flags =
+		(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+		 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_MBUF_NOFF_F | NIX_TX_OFFLOAD_TSO_F |
+		 NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_SECURITY_F | NIX_TX_MULTI_SEG_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 19/22] common/cnxk: add SSO event aggregator
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (16 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
                           ` (3 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add configuration APIs for CN20K SSO event
aggregator which allows SSO to generate event
vectors.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/sso.h        |  33 ++++
 drivers/common/cnxk/roc_mbox.h      |  21 +++
 drivers/common/cnxk/roc_model.h     |  13 ++
 drivers/common/cnxk/roc_nix_queue.c |   5 -
 drivers/common/cnxk/roc_sso.c       | 230 +++++++++++++++++++++++++++-
 drivers/common/cnxk/roc_sso.h       |  19 ++-
 drivers/common/cnxk/roc_sso_priv.h  |   4 +
 drivers/common/cnxk/version.map     |   4 +
 8 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/drivers/common/cnxk/hw/sso.h b/drivers/common/cnxk/hw/sso.h
index 09b8d4955f..79337a8a3b 100644
--- a/drivers/common/cnxk/hw/sso.h
+++ b/drivers/common/cnxk/hw/sso.h
@@ -146,6 +146,7 @@
 #define SSO_LF_GGRP_OP_ADD_WORK0 (0x0ull)
 #define SSO_LF_GGRP_OP_ADD_WORK1 (0x8ull)
 #define SSO_LF_GGRP_QCTL	 (0x20ull)
+#define SSO_LF_GGRP_TAG_CFG	 (0x40ull)
 #define SSO_LF_GGRP_EXE_DIS	 (0x80ull)
 #define SSO_LF_GGRP_INT		 (0x100ull)
 #define SSO_LF_GGRP_INT_W1S	 (0x108ull)
@@ -159,6 +160,10 @@
 #define SSO_LF_GGRP_MISC_CNT	 (0x200ull)
 #define SSO_LF_GGRP_OP_AW_LMTST	 (0x400ull)
 
+#define SSO_LF_GGRP_AGGR_CFG	    (0x300ull)
+#define SSO_LF_GGRP_AGGR_CTX_BASE   (0x308ull)
+#define SSO_LF_GGRP_AGGR_CTX_INSTOP (0x310ull)
+
 #define SSO_AF_IAQ_FREE_CNT_MASK      0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_MASK     0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_SHIFT    16
@@ -230,5 +235,33 @@
 #define SSO_TT_ATOMIC	(0x1ull)
 #define SSO_TT_UNTAGGED (0x2ull)
 #define SSO_TT_EMPTY	(0x3ull)
+#define SSO_TT_AGG	(0x3ull)
+
+#define SSO_LF_AGGR_INSTOP_FLUSH	(0x0ull)
+#define SSO_LF_AGGR_INSTOP_EVICT	(0x1ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_FLUSH (0x2ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_EVICT (0x3ull)
+
+#define SSO_AGGR_CTX_SZ	    16
+#define SSO_AGGR_NUM_CTX(a) (1 << (a + 6))
+#define SSO_AGGR_MIN_CTX    SSO_AGGR_NUM_CTX(0)
+#define SSO_AGGR_MAX_CTX    SSO_AGGR_NUM_CTX(10)
+#define SSO_AGGR_DEF_TMO    0x3Full
+
+struct sso_agq_ctx {
+	uint64_t ena : 1;
+	uint64_t rsvd_1_3 : 3;
+	uint64_t vwqe_aura : 17;
+	uint64_t rsvd_21_31 : 11;
+	uint64_t tag : 32;
+	uint64_t tt : 2;
+	uint64_t rsvd_66_67 : 2;
+	uint64_t swqe_tag : 12;
+	uint64_t max_vsize_exp : 4;
+	uint64_t vtimewait : 12;
+	uint64_t xqe_type : 4;
+	uint64_t cnt_ena : 1;
+	uint64_t rsvd_101_127 : 27;
+};
 
 #endif /* __SSO_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 63139b5517..db6e8f07b3 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,10 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_AGGR_SET_CONFIG, 0x615, sso_aggr_setconfig, sso_aggr_setconfig,  \
+	  msg_rsp)                                                             \
+	M(SSO_AGGR_GET_STATS, 0x616, sso_aggr_get_stats, sso_info_req,         \
+	  sso_aggr_stats)                                                      \
 	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
@@ -2191,6 +2195,13 @@ struct sso_grp_stash_cfg {
 	uint8_t __io num_linesm1 : 4;
 };
 
+struct sso_aggr_setconfig {
+	struct mbox_msghdr hdr;
+	uint16_t __io npa_pf_func;
+	uint16_t __io hwgrp;
+	uint64_t __io rsvd[2];
+};
+
 struct sso_grp_stats {
 	struct mbox_msghdr hdr;
 	uint16_t __io grp;
@@ -2210,6 +2221,16 @@ struct sso_hws_stats {
 	uint64_t __io arbitration;
 };
 
+struct sso_aggr_stats {
+	struct mbox_msghdr hdr;
+	uint16_t __io grp;
+	uint64_t __io flushed;
+	uint64_t __io completed;
+	uint64_t __io npa_fail;
+	uint64_t __io timeout;
+	uint64_t __io rsvd[4];
+};
+
 /* CPT mailbox error codes
  * Range 901 - 1000.
  */
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 4e686bea2c..0de141b0cc 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include "roc_bits.h"
+#include "roc_constants.h"
 
 extern struct roc_model *roc_model;
 
@@ -157,6 +158,18 @@ roc_model_is_cn20k(void)
 	return roc_model_runtime_is_cn20k();
 }
 
+static inline uint16_t
+roc_model_optimal_align_sz(void)
+{
+	if (roc_model_is_cn9k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn10k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn20k())
+		return ROC_ALIGN << 1;
+	return 128;
+}
+
 static inline uint64_t
 roc_model_is_cn98xx(void)
 {
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 06029275af..e852211ba4 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -794,9 +794,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 		aq->rq.good_utag = rq->tag_mask >> 24;
 		aq->rq.bad_utag = rq->tag_mask >> 24;
 		aq->rq.ltag = rq->tag_mask & BITMASK_ULL(24, 0);
-
-		if (rq->vwqe_ena)
-			aq->rq.wqe_aura = roc_npa_aura_handle_to_aura(rq->vwqe_aura_handle);
 	} else {
 		/* CQ mode */
 		aq->rq.sso_ena = 0;
@@ -881,8 +878,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 			aq->rq_mask.good_utag = ~aq->rq_mask.good_utag;
 			aq->rq_mask.bad_utag = ~aq->rq_mask.bad_utag;
 			aq->rq_mask.ltag = ~aq->rq_mask.ltag;
-			if (rq->vwqe_ena)
-				aq->rq_mask.wqe_aura = ~aq->rq_mask.wqe_aura;
 		} else {
 			/* CQ mode */
 			aq->rq_mask.sso_ena = ~aq->rq_mask.sso_ena;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 45cf6fc39e..4996329018 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -500,9 +500,231 @@ roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 	mbox_put(mbox);
 }
 
+static void
+sso_agq_op_wait(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	uint64_t reg;
+
+	reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	while (reg & BIT_ULL(2)) {
+		plt_delay_us(100);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) +
+				 SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	}
+}
+
+int
+roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_agq_data *data)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	uint32_t cnt, off;
+	struct mbox *mbox;
+	uintptr_t ptr;
+	uint64_t reg;
+	int rc;
+
+	if (sso->agg_mem[hwgrp] == 0) {
+		mbox = mbox_get(sso->dev.mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			mbox_process(mbox);
+			req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+			if (req == NULL) {
+				plt_err("Failed to allocate AGQ config mbox.");
+				mbox_put(mbox);
+				return -EIO;
+			}
+		}
+
+		req->hwgrp = hwgrp;
+		req->npa_pf_func = idev_npa_pffunc_get();
+		rc = mbox_process(mbox);
+		if (rc < 0) {
+			plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+			mbox_put(mbox);
+			return rc;
+		}
+
+		mbox_put(mbox);
+
+		sso->agg_mem[hwgrp] =
+			(uintptr_t)plt_zmalloc(SSO_AGGR_MIN_CTX * sizeof(struct sso_agq_ctx),
+					       roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0)
+			return -ENOMEM;
+		sso->agg_cnt[hwgrp] = SSO_AGGR_MIN_CTX;
+		sso->agg_used[hwgrp] = 0;
+		plt_wmb();
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = (plt_log2_u32(SSO_AGGR_MIN_CTX) - 6) << 16;
+		reg |= (SSO_AGGR_DEF_TMO << 4) | 1;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	}
+
+	if (sso->agg_cnt[hwgrp] >= SSO_AGGR_MAX_CTX)
+		return -ENOSPC;
+
+	if (sso->agg_cnt[hwgrp] == sso->agg_used[hwgrp]) {
+		ptr = sso->agg_mem[hwgrp];
+		cnt = sso->agg_cnt[hwgrp] << 1;
+		sso->agg_mem[hwgrp] = (uintptr_t)plt_zmalloc(cnt * sizeof(struct sso_agq_ctx),
+							     roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0) {
+			sso->agg_mem[hwgrp] = ptr;
+			return -ENOMEM;
+		}
+
+		memcpy((void *)sso->agg_mem[hwgrp], (void *)ptr,
+		       sso->agg_cnt[hwgrp] * sizeof(struct sso_agq_ctx));
+		plt_wmb();
+		sso_agq_op_wait(roc_sso, hwgrp);
+		/* Base address has changed, evict old entries. */
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg &= ~GENMASK_ULL(19, 16);
+		reg |= (uint64_t)(plt_log2_u32(cnt) - 6) << 16;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg = SSO_LF_AGGR_INSTOP_GLOBAL_EVICT << 4;
+		plt_write64(reg,
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+		sso_agq_op_wait(roc_sso, hwgrp);
+		plt_free((void *)ptr);
+
+		sso->agg_cnt[hwgrp] = cnt;
+		off = sso->agg_used[hwgrp];
+	} else {
+		ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+		for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+			if (!ctx[cnt].ena)
+				break;
+		}
+		if (cnt == sso->agg_cnt[hwgrp])
+			return -EINVAL;
+		off = cnt;
+	}
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += off;
+	ctx->ena = 1;
+	ctx->tt = data->tt;
+	ctx->tag = data->tag;
+	ctx->swqe_tag = data->stag;
+	ctx->cnt_ena = data->cnt_ena;
+	ctx->xqe_type = data->xqe_type;
+	ctx->vtimewait = data->vwqe_wait_tmo;
+	ctx->vwqe_aura = data->vwqe_aura;
+	ctx->max_vsize_exp = data->vwqe_max_sz_exp - 2;
+
+	plt_wmb();
+	sso->agg_used[hwgrp]++;
+
+	return 0;
+}
+
+void
+roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint64_t reg;
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += agq_id;
+
+	if (!ctx->ena)
+		return;
+
+	reg = SSO_LF_AGGR_INSTOP_FLUSH << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+
+	memset(ctx, 0, sizeof(struct sso_agq_ctx));
+	plt_wmb();
+	sso->agg_used[hwgrp]--;
+
+	/* Flush the context from CTX Cache */
+	reg = SSO_LF_AGGR_INSTOP_EVICT << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+}
+
+void
+roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	struct mbox *mbox;
+	uint32_t cnt;
+	int rc;
+
+	if (!roc_sso->feat.eva_present)
+		return;
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+		if (!ctx[cnt].ena)
+			continue;
+		roc_sso_hwgrp_agq_free(roc_sso, hwgrp, cnt);
+	}
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+	plt_free((void *)sso->agg_mem[hwgrp]);
+	sso->agg_mem[hwgrp] = 0;
+	sso->agg_cnt[hwgrp] = 0;
+	sso->agg_used[hwgrp] = 0;
+
+	mbox = mbox_get(sso->dev.mbox);
+	req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+	if (req == NULL) {
+		mbox_process(mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			plt_err("Failed to allocate AGQ config mbox.");
+			mbox_put(mbox);
+			return;
+		}
+	}
+
+	req->hwgrp = hwgrp;
+	req->npa_pf_func = 0;
+	rc = mbox_process(mbox);
+	if (rc < 0)
+		plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+	mbox_put(mbox);
+}
+
+uint32_t
+roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag_mask,
+			   uint8_t xqe_type)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint32_t i;
+
+	plt_rmb();
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (i = 0; i < sso->agg_used[hwgrp]; i++) {
+		if (!ctx[i].ena)
+			continue;
+		if (ctx[i].tag == tag_mask && ctx[i].xqe_type == xqe_type)
+			return i;
+	}
+
+	return UINT32_MAX;
+}
+
 int
-roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
-			struct roc_sso_hwgrp_stats *stats)
+roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_hwgrp_stats *stats)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
 	struct sso_grp_stats *req_rsp;
@@ -1058,10 +1280,14 @@ void
 roc_sso_rsrc_fini(struct roc_sso *roc_sso)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	uint32_t cnt;
 
 	if (!roc_sso->nb_hws && !roc_sso->nb_hwgrp)
 		return;
 
+	for (cnt = 0; cnt < roc_sso->nb_hwgrp; cnt++)
+		roc_sso_hwgrp_agq_release(roc_sso, cnt);
+
 	sso_unregister_irqs_priv(roc_sso, sso->pci_dev->intr_handle,
 				 roc_sso->nb_hws, roc_sso->nb_hwgrp);
 	sso_lf_free(&sso->dev, SSO_LF_TYPE_HWS, roc_sso->nb_hws);
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 021db22c86..f73128087a 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -47,6 +47,17 @@ struct roc_sso_xaq_data {
 	void *mem;
 };
 
+struct roc_sso_agq_data {
+	uint8_t tt;
+	uint8_t cnt_ena;
+	uint8_t xqe_type;
+	uint16_t stag;
+	uint32_t tag;
+	uint32_t vwqe_max_sz_exp;
+	uint64_t vwqe_wait_tmo;
+	uint64_t vwqe_aura;
+};
+
 struct roc_sso {
 	struct plt_pci_device *pci_dev;
 	/* Public data. */
@@ -100,6 +111,12 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 					 uint16_t nb_stash);
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
+int __roc_api roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp,
+				      struct roc_sso_agq_data *data);
+void __roc_api roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id);
+void __roc_api roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp);
+uint32_t __roc_api roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag,
+					      uint8_t xqe_type);
 
 /* Utility function */
 uint16_t __roc_api roc_sso_pf_func_get(void);
@@ -107,7 +124,7 @@ uint16_t __roc_api roc_sso_pf_func_get(void);
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
-int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
+int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp,
 				      struct roc_sso_hwgrp_stats *stats);
 int __roc_api roc_sso_hws_stats_get(struct roc_sso *roc_sso, uint8_t hws,
 				    struct roc_sso_hws_stats *stats);
diff --git a/drivers/common/cnxk/roc_sso_priv.h b/drivers/common/cnxk/roc_sso_priv.h
index 21c59c57e6..d6dc6dedd3 100644
--- a/drivers/common/cnxk/roc_sso_priv.h
+++ b/drivers/common/cnxk/roc_sso_priv.h
@@ -13,6 +13,10 @@ struct sso_rsrc {
 struct sso {
 	struct plt_pci_device *pci_dev;
 	struct dev dev;
+	/* EVA memory area */
+	uintptr_t agg_mem[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_used[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_cnt[MAX_RVU_BLKLF_CNT];
 	/* Interrupt handler args. */
 	struct sso_rsrc hws_rsrc[MAX_RVU_BLKLF_CNT];
 	struct sso_rsrc hwgrp_rsrc[MAX_RVU_BLKLF_CNT];
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index de748ac409..14ee6031e2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -500,6 +500,10 @@ INTERNAL {
 	roc_sso_dev_fini;
 	roc_sso_dev_init;
 	roc_sso_dump;
+	roc_sso_hwgrp_agq_alloc;
+	roc_sso_hwgrp_agq_free;
+	roc_sso_hwgrp_agq_from_tag;
+	roc_sso_hwgrp_agq_release;
 	roc_sso_hwgrp_alloc_xaq;
 	roc_sso_hwgrp_base_get;
 	roc_sso_hwgrp_free_xaq_aura;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 20/22] event/cnxk: add Rx/Tx event vector support
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (17 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 19/22] common/cnxk: add SSO event aggregator pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 21/22] common/cnxk: update timer base code pbhagavatula
                           ` (2 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add Event vector support for CN20K Rx/Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c      | 185 ++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_tx_worker.h     |  84 ++++++++++
 drivers/event/cnxk/cn20k_worker.h        |  63 ++++++++
 drivers/event/cnxk/cnxk_eventdev.h       |   3 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  16 +-
 5 files changed, 340 insertions(+), 11 deletions(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 5d49a5e5c6..57e15b6d8c 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -75,6 +75,7 @@ cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = dev->sso.lmt_base;
 	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->lmt_base = dev->sso.lmt_base;
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -595,7 +596,8 @@ cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
 	else
 		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
 			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
-			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -641,6 +643,156 @@ cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
 	eventdev_fops_tstamp_update(event_dev);
 }
 
+static int
+cn20k_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, uint16_t port_id,
+		     const struct rte_event_eth_rx_adapter_queue_conf *queue_conf, int agq)
+{
+	struct roc_nix_rq *rq;
+	uint32_t tag_mask;
+	uint16_t wqe_skip;
+	uint8_t tt;
+	int rc;
+
+	rq = &cnxk_eth_dev->rqs[rq_id];
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+		tag_mask = agq;
+		tt = SSO_TT_AGG;
+		rq->flow_tag_width = 0;
+	} else {
+		tag_mask = (port_id & 0xFF) << 20;
+		tag_mask |= (RTE_EVENT_TYPE_ETHDEV << 28);
+		tt = queue_conf->ev.sched_type;
+		rq->flow_tag_width = 20;
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) {
+			rq->flow_tag_width = 0;
+			tag_mask |= queue_conf->ev.flow_id;
+		}
+	}
+
+	rq->tag_mask = tag_mask;
+	rq->sso_ena = 1;
+	rq->tt = tt;
+	rq->hwgrp = queue_conf->ev.queue_id;
+	wqe_skip = RTE_ALIGN_CEIL(sizeof(struct rte_mbuf), ROC_CACHE_LINE_SZ);
+	wqe_skip = wqe_skip / ROC_CACHE_LINE_SZ;
+	rq->wqe_skip = wqe_skip;
+
+	rc = roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+	return rc;
+}
+
+static int
+cn20k_sso_rx_adapter_vwqe_enable(struct cnxk_sso_evdev *dev, uint16_t port_id, uint16_t rq_id,
+				 const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	uint32_t agq, tag_mask, stag_mask;
+	struct roc_sso_agq_data data;
+	int rc;
+
+	tag_mask = (port_id & 0xff) << 20;
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)
+		tag_mask |= queue_conf->ev.flow_id;
+	else
+		tag_mask |= rq_id;
+
+	stag_mask = tag_mask;
+	tag_mask |= RTE_EVENT_TYPE_ETHDEV_VECTOR << 28;
+	stag_mask |= RTE_EVENT_TYPE_ETHDEV << 28;
+
+	memset(&data, 0, sizeof(struct roc_sso_agq_data));
+	data.tag = tag_mask;
+	data.tt = queue_conf->ev.sched_type;
+	data.stag = stag_mask;
+	data.vwqe_aura = roc_npa_aura_handle_to_aura(queue_conf->vector_mp->pool_id);
+	data.vwqe_max_sz_exp = rte_log2_u32(queue_conf->vector_sz);
+	data.vwqe_wait_tmo = queue_conf->vector_timeout_ns / ((SSO_AGGR_DEF_TMO + 1) * 100);
+	data.xqe_type = 0;
+
+	rc = roc_sso_hwgrp_agq_alloc(&dev->sso, queue_conf->ev.queue_id, &data);
+	if (rc < 0)
+		return rc;
+
+	agq = roc_sso_hwgrp_agq_from_tag(&dev->sso, queue_conf->ev.queue_id, tag_mask, 0);
+	return agq;
+}
+
+static int
+cn20k_rx_adapter_queue_add(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id,
+			   const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t port = eth_dev->data->port_id;
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	int i, rc = 0, agq = 0;
+
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			rc |= cn20k_rx_adapter_queue_add(event_dev, eth_dev, i, queue_conf);
+	} else {
+		rxq_sp = cnxk_eth_rxq_to_sp(eth_dev->data->rx_queues[rx_queue_id]);
+		cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+		rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+			cnxk_sso_updt_xae_cnt(dev, queue_conf->vector_mp,
+					      RTE_EVENT_TYPE_ETHDEV_VECTOR);
+			rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+			if (rc < 0)
+				return rc;
+
+			rc = cn20k_sso_rx_adapter_vwqe_enable(dev, port, rx_queue_id, queue_conf);
+			if (rc < 0)
+				return rc;
+			agq = rc;
+		}
+
+		rc = cn20k_sso_rxq_enable(cnxk_eth_dev, (uint16_t)rx_queue_id, port, queue_conf,
+					  agq);
+
+		/* Propagate force bp devarg */
+		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
+		cnxk_sso_tstamp_cfg(port, eth_dev, dev);
+		cnxk_eth_dev->nb_rxq_sso++;
+	}
+
+	if (rc < 0) {
+		plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+			queue_conf->ev.queue_id);
+		return rc;
+	}
+
+	dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+	return 0;
+}
+
+static int
+cn20k_rx_adapter_queue_del(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_nix_rq *rxq;
+	int i, rc = 0;
+
+	RTE_SET_USED(event_dev);
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			cn20k_rx_adapter_queue_del(event_dev, eth_dev, i);
+	} else {
+		rxq = &cnxk_eth_dev->rqs[rx_queue_id];
+		if (rxq->tt == SSO_TT_AGG)
+			roc_sso_hwgrp_agq_free(&dev->sso, rxq->hwgrp, rxq->tag_mask);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
+		cnxk_eth_dev->nb_rxq_sso--;
+	}
+
+	if (rc < 0)
+		plt_err("Failed to clear Rx adapter config port=%d, q=%d", eth_dev->data->port_id,
+			rx_queue_id);
+	return rc;
+}
+
 static int
 cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
@@ -657,7 +809,7 @@ cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	rc = cn20k_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
 	if (rc)
 		return -EINVAL;
 
@@ -690,7 +842,29 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+	return cn20k_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
+static int
+cn20k_sso_rx_adapter_vector_limits(const struct rte_eventdev *dev,
+				   const struct rte_eth_dev *eth_dev,
+				   struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	RTE_SET_USED(eth_dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		return -ENOTSUP;
+
+	limits->log2_sz = true;
+	limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+	limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+	limits->min_timeout_ns = (SSO_AGGR_DEF_TMO + 1) * 100;
+	limits->max_timeout_ns = (BITMASK_ULL(11, 0) + 1) * limits->min_timeout_ns;
+
+	return 0;
 }
 
 static int
@@ -704,7 +878,8 @@ cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_e
 	if (ret)
 		*caps = 0;
 	else
-		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -807,6 +982,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_rx_adapter_vector_limits_get = cn20k_sso_rx_adapter_vector_limits,
+
 	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
 	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
 	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index c8ab560b0e..b09d845b09 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -139,10 +139,58 @@ cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, ui
 	return 1;
 }
 
+static __rte_always_inline uint16_t
+cn20k_sso_vwqe_split_tx(struct cn20k_sso_hws *ws, struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+			uint64_t *cmd, const uint64_t *txq_data, const uint32_t flags)
+{
+	uint16_t count = 0, port, queue, ret = 0, last_idx = 0;
+	struct cn20k_eth_txq *txq;
+	int32_t space;
+	int i;
+
+	port = mbufs[0]->port;
+	queue = rte_event_eth_tx_adapter_txq_get(mbufs[0]);
+	for (i = 0; i < nb_mbufs; i++) {
+		if (port != mbufs[i]->port || queue != rte_event_eth_tx_adapter_txq_get(mbufs[i])) {
+			if (count) {
+				txq = (struct cn20k_eth_txq
+					       *)(txq_data[(txq_data[port] >> 48) + queue] &
+						  (BIT_ULL(48) - 1));
+				/* Transmit based on queue depth */
+				space = cn20k_sso_sq_depth(txq);
+				if (space < count)
+					goto done;
+				cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx],
+							   count, cmd, flags | NIX_TX_VWQE_F);
+				ret += count;
+				count = 0;
+			}
+			port = mbufs[i]->port;
+			queue = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+			last_idx = i;
+		}
+		count++;
+	}
+	if (count) {
+		txq = (struct cn20k_eth_txq *)(txq_data[(txq_data[port] >> 48) + queue] &
+					       (BIT_ULL(48) - 1));
+		/* Transmit based on queue depth */
+		space = cn20k_sso_sq_depth(txq);
+		if (space < count)
+			goto done;
+		cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx], count, cmd,
+					   flags | NIX_TX_VWQE_F);
+		ret += count;
+	}
+done:
+	return ret;
+}
+
 static __rte_always_inline uint16_t
 cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
 		       const uint64_t *txq_data, const uint32_t flags)
 {
+	struct cn20k_eth_txq *txq;
 	struct rte_mbuf *m;
 	uintptr_t lmt_addr;
 	uint16_t lmt_id;
@@ -150,6 +198,42 @@ cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t
 	lmt_addr = ws->lmt_base;
 	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
 
+	if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+		struct rte_mbuf **mbufs = ev->vec->mbufs;
+		uint64_t meta = *(uint64_t *)ev->vec;
+		uint16_t offset, nb_pkts, left;
+		int32_t space;
+
+		nb_pkts = meta & 0xFFFF;
+		offset = (meta >> 16) & 0xFFF;
+		if (meta & BIT(31)) {
+			txq = (struct cn20k_eth_txq
+				       *)(txq_data[(txq_data[meta >> 32] >> 48) + (meta >> 48)] &
+					  (BIT_ULL(48) - 1));
+
+			/* Transmit based on queue depth */
+			space = cn20k_sso_sq_depth(txq);
+			if (space <= 0)
+				return 0;
+			nb_pkts = nb_pkts < space ? nb_pkts : (uint16_t)space;
+			cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs + offset, nb_pkts,
+						   cmd, flags | NIX_TX_VWQE_F);
+		} else {
+			nb_pkts = cn20k_sso_vwqe_split_tx(ws, mbufs + offset, nb_pkts, cmd,
+							  txq_data, flags);
+		}
+		left = (meta & 0xFFFF) - nb_pkts;
+
+		if (!left) {
+			rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+		} else {
+			*(uint64_t *)ev->vec =
+				(meta & ~0xFFFFFFFUL) | (((uint32_t)nb_pkts + offset) << 16) | left;
+		}
+		rte_prefetch0(ws);
+		return !left;
+	}
+
 	m = ev->mbuf;
 	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
 }
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 9075073fd2..5799e5cc49 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -41,6 +41,58 @@ cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info
 	}
 }
 
+static __rte_always_inline void
+cn20k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, struct cn20k_sso_hws *ws)
+{
+	uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM;
+	struct cnxk_timesync_info *tstamp = ws->tstamp[port_id];
+	void *lookup_mem = ws->lookup_mem;
+	uintptr_t lbase = ws->lmt_base;
+	struct rte_event_vector *vec;
+	uint16_t nb_mbufs, non_vec;
+	struct rte_mbuf **wqe;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+	int i;
+
+	mbuf_init |= ((uint64_t)port_id) << 48;
+	vec = (struct rte_event_vector *)vwqe;
+	wqe = vec->mbufs;
+
+	rte_prefetch0(&vec->ptrs[0]);
+#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
+	for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
+		rte_prefetch0(&vec->ptrs[i]);
+
+	if (flags & NIX_RX_OFFLOAD_TSTAMP_F && tstamp)
+		mbuf_init |= 8;
+
+	nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+	nb_mbufs = cn20k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs, flags | NIX_RX_VWQE_F,
+					      lookup_mem, tstamp, lbase, 0);
+	wqe += nb_mbufs;
+	non_vec = vec->nb_elem - nb_mbufs;
+
+	while (non_vec) {
+		struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+
+		mbuf = (struct rte_mbuf *)((char *)cqe - sizeof(struct rte_mbuf));
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp((uint64_t)wqe[0], (uint64_t)mbuf, tstamp);
+		wqe[0] = (struct rte_mbuf *)mbuf;
+		non_vec--;
+		wqe++;
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
@@ -65,6 +117,17 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
 			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
 		u64[1] = mbuf;
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		__uint128_t vwqe_hdr = *(__uint128_t *)u64[1];
+
+		vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | ((vwqe_hdr & 0xFFFF) << 48) |
+			   ((uint64_t)port << 32);
+		*(uint64_t *)u64[1] = (uint64_t)vwqe_hdr;
+		cn20k_process_vwqe(u64[1], port, flags, ws);
+		/* Mark vector mempool object as get */
+		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
+					  1);
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 4066497e6b..33b3538753 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -266,6 +266,9 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
 			      const struct rte_eth_dev *eth_dev);
 int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
 			     const struct rte_eth_dev *eth_dev);
+void cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev,
+			 struct cnxk_sso_evdev *dev);
+int cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id);
 int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
 				  const struct rte_eth_dev *eth_dev,
 				  int32_t tx_queue_id);
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3cac42111a..4cf48db74c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -167,9 +167,10 @@ cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
 	return rc;
 }
 
-static int
-cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+int
+cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
 	struct roc_nix_rq *rq;
 
 	rq = &cnxk_eth_dev->rqs[rq_id];
@@ -209,10 +210,11 @@ cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
 	return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
 }
 
-static void
-cnxk_sso_tstamp_cfg(uint16_t port_id, struct cnxk_eth_dev *cnxk_eth_dev,
-		    struct cnxk_sso_evdev *dev)
+void
+cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev, struct cnxk_sso_evdev *dev)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+
 	if (cnxk_eth_dev->rx_offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP || cnxk_eth_dev->ptp_en)
 		dev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
 }
@@ -263,7 +265,7 @@ cnxk_sso_rx_adapter_queue_add(
 
 		/* Propagate force bp devarg */
 		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
-		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, cnxk_eth_dev, dev);
+		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, eth_dev, dev);
 		cnxk_eth_dev->nb_rxq_sso++;
 	}
 
@@ -290,7 +292,7 @@ cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
 			cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
 	} else {
-		rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
 		cnxk_eth_dev->nb_rxq_sso--;
 
 		/* Enable drop_re if it was disabled earlier */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 21/22] common/cnxk: update timer base code
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (18 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25  8:13         ` [PATCH v5 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Update event timer base code to support configuring
HW accelerated timer arm and cancel.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/tim.h        |  5 ++
 drivers/common/cnxk/roc_mbox.h      | 38 ++++++++++++-
 drivers/common/cnxk/roc_tim.c       | 84 ++++++++++++++++++++++++++---
 drivers/common/cnxk/roc_tim.h       | 20 +++++--
 drivers/common/cnxk/version.map     |  1 +
 drivers/event/cnxk/cnxk_tim_evdev.h |  5 --
 6 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index 82b094e3dc..75700a11b8 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -47,10 +47,15 @@
 #define TIM_LF_RAS_INT_ENA_W1S	   (0x310)
 #define TIM_LF_RAS_INT_ENA_W1C	   (0x318)
 #define TIM_LF_RING_REL		   (0x400)
+#define TIM_LF_SCHED_TIMER0	   (0x480)
+#define TIM_LF_RING_FIRST_EXPIRY   (0x558)
 
 #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
+#define TIM_MAX_INTERVAL_EXT_TICKS ((1ULL << 34) - 1)
 #define TIM_MAX_BUCKET_SIZE    ((1ULL << 20) - 2)
 #define TIM_MIN_BUCKET_SIZE    1
 #define TIM_BUCKET_WRAP_SIZE   3
+#define TIM_BUCKET_MIN_GAP     1
+#define TIM_NPA_TMO            0xFFFF
 
 #endif /* __TIM_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index db6e8f07b3..8c0e274684 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -164,6 +164,9 @@ struct mbox_msghdr {
 	  tim_intvl_rsp)                                                       \
 	M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req,          \
 	  tim_capture_rsp)                                                     \
+	M(TIM_CONFIG_HWWQE,    0x807, tim_config_hwwqe, tim_cfg_hwwqe_req,     \
+	  msg_rsp)                                                             \
+	M(TIM_GET_HW_INFO,     0x808, tim_get_hw_info, msg_req, tim_hw_info)   \
 	/* CPT mbox IDs (range 0xA00 - 0xBFF) */                               \
 	M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)    \
 	M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)                   \
@@ -2803,6 +2806,7 @@ enum tim_af_status {
 	TIM_AF_INVALID_ENABLE_DONTFREE = -815,
 	TIM_AF_ENA_DONTFRE_NSET_PERIODIC = -816,
 	TIM_AF_RING_ALREADY_DISABLED = -817,
+	TIM_AF_LF_START_SYNC_FAIL = -818,
 };
 
 enum tim_clk_srcs {
@@ -2895,13 +2899,43 @@ struct tim_config_req {
 	uint8_t __io enabledontfreebuffer;
 	uint32_t __io bucketsize;
 	uint32_t __io chunksize;
-	uint32_t __io interval;
+	uint32_t __io interval_lo;
 	uint8_t __io gpioedge;
-	uint8_t __io rsvd[7];
+	uint8_t __io rsvd[3];
+	uint32_t __io interval_hi;
 	uint64_t __io intervalns;
 	uint64_t __io clockfreq;
 };
 
+struct tim_cfg_hwwqe_req {
+	struct mbox_msghdr hdr;
+	uint16_t __io ring;
+	uint8_t __io grp_ena;
+	uint8_t __io hwwqe_ena;
+	uint8_t __io ins_min_gap;
+	uint8_t __io flw_ctrl_ena;
+	uint8_t __io wqe_rd_clr_ena;
+	uint16_t __io grp_tmo_cntr;
+	uint16_t __io npa_tmo_cntr;
+	uint16_t __io result_offset;
+	uint16_t __io event_count_offset;
+	uint64_t __io rsvd[2];
+};
+
+struct tim_feat_info {
+	uint16_t __io rings;
+	uint8_t __io engines;
+	uint8_t __io hwwqe : 1;
+	uint8_t __io intvl_ext : 1;
+	uint8_t __io rsvd8[4];
+	uint64_t __io rsvd[2];
+};
+
+struct tim_hw_info {
+	struct mbox_msghdr hdr;
+	struct tim_feat_info feat;
+};
+
 struct tim_lf_alloc_rsp {
 	struct mbox_msghdr hdr;
 	uint64_t __io tenns_clk;
diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index 83228fb2b6..e326ea0122 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+#define LF_ENABLE_RETRY_CNT 8
+
 static int
 tim_fill_msix(struct roc_tim *roc_tim, uint16_t nb_ring)
 {
@@ -86,8 +88,11 @@ tim_err_desc(int rc)
 	case TIM_AF_RING_ALREADY_DISABLED:
 		plt_err("Ring already stopped");
 		break;
+	case TIM_AF_LF_START_SYNC_FAIL:
+		plt_err("Ring start sync failed.");
+		break;
 	default:
-		plt_err("Unknown Error.");
+		plt_err("Unknown Error: %d", rc);
 	}
 }
 
@@ -123,10 +128,12 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
 	struct mbox *mbox = mbox_get(dev->mbox);
+	uint8_t retry_cnt = LF_ENABLE_RETRY_CNT;
 	struct tim_enable_rsp *rsp;
 	struct tim_ring_req *req;
 	int rc = -ENOSPC;
 
+retry:
 	req = mbox_alloc_msg_tim_enable_ring(mbox);
 	if (req == NULL)
 		goto fail;
@@ -134,6 +141,9 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 
 	rc = mbox_process_msg(dev->mbox, (void **)&rsp);
 	if (rc) {
+		if (rc == TIM_AF_LF_START_SYNC_FAIL && retry_cnt--)
+			goto retry;
+
 		tim_err_desc(rc);
 		rc = -EIO;
 		goto fail;
@@ -183,10 +193,9 @@ roc_tim_lf_base_get(struct roc_tim *roc_tim, uint8_t ring_id)
 }
 
 int
-roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-		  enum roc_tim_clk_src clk_src, uint8_t ena_periodic,
-		  uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
-		  uint32_t interval, uint64_t intervalns, uint64_t clockfreq)
+roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id, enum roc_tim_clk_src clk_src,
+		  uint8_t ena_periodic, uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
+		  uint64_t interval, uint64_t intervalns, uint64_t clockfreq)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
@@ -204,7 +213,8 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	req->clocksource = clk_src;
 	req->enableperiodic = ena_periodic;
 	req->enabledontfreebuffer = ena_dfb;
-	req->interval = interval;
+	req->interval_lo = interval;
+	req->interval_hi = interval >> 32;
 	req->intervalns = intervalns;
 	req->clockfreq = clockfreq;
 	req->gpioedge = TIM_GPIO_LTOH_TRANS;
@@ -220,6 +230,41 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	return rc;
 }
 
+int
+roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id, struct roc_tim_hwwqe_cfg *cfg)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
+	struct dev *dev = &sso->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_cfg_hwwqe_req *req;
+	int rc = -ENOSPC;
+
+	req = mbox_alloc_msg_tim_config_hwwqe(mbox);
+	if (req == NULL)
+		goto fail;
+	req->ring = ring_id;
+	req->hwwqe_ena = cfg->hwwqe_ena;
+	req->grp_ena = cfg->grp_ena;
+	req->grp_tmo_cntr = cfg->grp_tmo_cyc;
+	req->flw_ctrl_ena = cfg->flw_ctrl_ena;
+	req->result_offset = cfg->result_offset;
+	req->event_count_offset = cfg->event_count_offset;
+
+	req->wqe_rd_clr_ena = 1;
+	req->npa_tmo_cntr = TIM_NPA_TMO;
+	req->ins_min_gap = TIM_BUCKET_MIN_GAP;
+
+	rc = mbox_process(mbox);
+	if (rc) {
+		tim_err_desc(rc);
+		rc = -EIO;
+	}
+
+fail:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_lf_interval(struct roc_tim *roc_tim, enum roc_tim_clk_src clk_src,
 		    uint64_t clockfreq, uint64_t *intervalns,
@@ -353,6 +398,31 @@ tim_free_lf_count_get(struct dev *dev, uint16_t *nb_lfs)
 	return 0;
 }
 
+static int
+tim_hw_info_get(struct roc_tim *roc_tim)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_tim->roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_tim_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc != MBOX_MSG_INVALID)
+		mbox_memcpy(&roc_tim->feat, &rsp->feat, sizeof(roc_tim->feat));
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_init(struct roc_tim *roc_tim)
 {
@@ -372,6 +442,8 @@ roc_tim_init(struct roc_tim *roc_tim)
 	PLT_STATIC_ASSERT(sizeof(struct tim) <= TIM_MEM_SZ);
 	nb_lfs = roc_tim->nb_lfs;
 
+	rc = tim_hw_info_get(roc_tim);
+
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
 		plt_tim_dbg("Failed to get TIM resource count");
diff --git a/drivers/common/cnxk/roc_tim.h b/drivers/common/cnxk/roc_tim.h
index f9a9ad1887..2eb6e6962b 100644
--- a/drivers/common/cnxk/roc_tim.h
+++ b/drivers/common/cnxk/roc_tim.h
@@ -19,10 +19,20 @@ enum roc_tim_clk_src {
 	ROC_TIM_CLK_SRC_INVALID,
 };
 
+struct roc_tim_hwwqe_cfg {
+	uint8_t grp_ena;
+	uint8_t hwwqe_ena;
+	uint8_t flw_ctrl_ena;
+	uint16_t grp_tmo_cyc;
+	uint16_t result_offset;
+	uint16_t event_count_offset;
+};
+
 struct roc_tim {
 	struct roc_sso *roc_sso;
 	/* Public data. */
 	uint16_t nb_lfs;
+	struct tim_feat_info feat;
 	/* Private data. */
 #define TIM_MEM_SZ (1 * 1024)
 	uint8_t reserved[TIM_MEM_SZ] __plt_cache_aligned;
@@ -36,11 +46,11 @@ int __roc_api roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id,
 				uint64_t *start_tsc, uint32_t *cur_bkt);
 int __roc_api roc_tim_lf_disable(struct roc_tim *roc_tim, uint8_t ring_id);
 int __roc_api roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-				enum roc_tim_clk_src clk_src,
-				uint8_t ena_periodic, uint8_t ena_dfb,
-				uint32_t bucket_sz, uint32_t chunk_sz,
-				uint32_t interval, uint64_t intervalns,
-				uint64_t clockfreq);
+				enum roc_tim_clk_src clk_src, uint8_t ena_periodic, uint8_t ena_dfb,
+				uint32_t bucket_sz, uint32_t chunk_sz, uint64_t interval,
+				uint64_t intervalns, uint64_t clockfreq);
+int __roc_api roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id,
+				      struct roc_tim_hwwqe_cfg *cfg);
 int __roc_api roc_tim_lf_interval(struct roc_tim *roc_tim,
 				  enum roc_tim_clk_src clk_src,
 				  uint64_t clockfreq, uint64_t *intervalns,
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 14ee6031e2..e7381ae8b2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -528,6 +528,7 @@ INTERNAL {
 	roc_tim_lf_alloc;
 	roc_tim_lf_base_get;
 	roc_tim_lf_config;
+	roc_tim_lf_config_hwwqe;
 	roc_tim_lf_disable;
 	roc_tim_lf_enable;
 	roc_tim_lf_free;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index f4c61dfb44..c5b3d67eb8 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -16,11 +16,6 @@
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
 
-#include "hw/tim.h"
-
-#include "roc_model.h"
-#include "roc_tim.h"
-
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
 #define TICK2NSEC(__tck, __freq) (((__tck)*NSECPERSEC) / (__freq))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v5 22/22] event/cnxk: add CN20K timer adapter
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (19 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 21/22] common/cnxk: update timer base code pbhagavatula
@ 2024-10-25  8:13         ` pbhagavatula
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25  8:13 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event timer adapter support for CN20K platform.
Implement new HWWQE insertion feature supported by CN20K platform.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_tim.c        |   6 +-
 drivers/event/cnxk/cn20k_eventdev.c  |  16 ++-
 drivers/event/cnxk/cn20k_worker.h    |   6 +
 drivers/event/cnxk/cnxk_tim_evdev.c  |  37 ++++-
 drivers/event/cnxk/cnxk_tim_evdev.h  |  14 ++
 drivers/event/cnxk/cnxk_tim_worker.c |  82 +++++++++--
 drivers/event/cnxk/cnxk_tim_worker.h | 201 +++++++++++++++++++++++++++
 7 files changed, 350 insertions(+), 12 deletions(-)

diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index e326ea0122..a1461fedb1 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -409,7 +409,7 @@ tim_hw_info_get(struct roc_tim *roc_tim)
 	mbox_alloc_msg_tim_get_hw_info(mbox);
 	rc = mbox_process_msg(mbox, (void **)&rsp);
 	if (rc && rc != MBOX_MSG_INVALID) {
-		plt_err("Failed to get SSO HW info");
+		plt_err("Failed to get TIM HW info");
 		rc = -EIO;
 		goto exit;
 	}
@@ -443,6 +443,10 @@ roc_tim_init(struct roc_tim *roc_tim)
 	nb_lfs = roc_tim->nb_lfs;
 
 	rc = tim_hw_info_get(roc_tim);
+	if (rc) {
+		plt_tim_dbg("Failed to get TIM HW info");
+		return 0;
+	}
 
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 57e15b6d8c..d68700fc05 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -957,6 +957,13 @@ cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
 	return cn20k_sso_updt_tx_adptr_data(event_dev);
 }
 
+static int
+cn20k_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags, uint32_t *caps,
+		   const struct event_timer_adapter_ops **ops)
+{
+	return cnxk_tim_caps_get(evdev, flags, caps, ops, cn20k_sso_set_priv_mem);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -991,6 +998,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
 	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
 
+	.timer_adapter_caps_get = cn20k_tim_caps_get,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -1068,4 +1077,9 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_STASH "=<string>"
-			      CNXK_SSO_FORCE_BP "=1");
+			      CNXK_SSO_FORCE_BP "=1"
+			      CNXK_TIM_DISABLE_NPA "=1"
+			      CNXK_TIM_CHNK_SLOTS "=<int>"
+			      CNXK_TIM_RINGS_LMT "=<int>"
+			      CNXK_TIM_STATS_ENA "=1"
+			      CNXK_TIM_EXT_CLK "=<string>");
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5799e5cc49..b014e549b9 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -5,6 +5,7 @@
 #ifndef __CN20K_WORKER_H__
 #define __CN20K_WORKER_H__
 
+#include <rte_event_timer_adapter.h>
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
@@ -128,6 +129,11 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		/* Mark vector mempool object as get */
 		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
 					  1);
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_TIMER) {
+		struct rte_event_timer *tev = (struct rte_event_timer *)u64[1];
+
+		tev->state = RTE_EVENT_TIMER_NOT_ARMED;
+		u64[1] = tev->ev.u64;
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 27a4dfb490..994d1d1090 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -78,9 +78,25 @@ cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
 	return rc;
 }
 
+static int
+cnxk_tim_enable_hwwqe(struct cnxk_tim_evdev *dev, struct cnxk_tim_ring *tim_ring)
+{
+	struct roc_tim_hwwqe_cfg hwwqe_cfg;
+
+	memset(&hwwqe_cfg, 0, sizeof(hwwqe_cfg));
+	hwwqe_cfg.hwwqe_ena = 1;
+	hwwqe_cfg.grp_ena = 0;
+	hwwqe_cfg.flw_ctrl_ena = 0;
+	hwwqe_cfg.result_offset = CNXK_TIM_HWWQE_RES_OFFSET_B;
+
+	tim_ring->lmt_base = dev->tim.roc_sso->lmt_base;
+	return roc_tim_lf_config_hwwqe(&dev->tim, tim_ring->ring_id, &hwwqe_cfg);
+}
+
 static void
 cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 {
+	struct cnxk_tim_evdev *dev = cnxk_tim_priv_get();
 	uint8_t prod_flag = !tim_ring->prod_type_sp;
 
 	/* [STATS] [DFB/FB] [SP][MP]*/
@@ -98,6 +114,16 @@ cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 #undef FP
 	};
 
+	if (dev == NULL)
+		return;
+
+	if (dev->tim.feat.hwwqe) {
+		cnxk_tim_ops.arm_burst = cnxk_tim_arm_burst_hwwqe;
+		cnxk_tim_ops.arm_tmo_tick_burst = cnxk_tim_arm_tmo_burst_hwwqe;
+		cnxk_tim_ops.cancel_burst = cnxk_tim_timer_cancel_burst_hwwqe;
+		return;
+	}
+
 	cnxk_tim_ops.arm_burst =
 		arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag];
 	cnxk_tim_ops.arm_tmo_tick_burst =
@@ -224,12 +250,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		}
 	}
 
-	if (tim_ring->disable_npa) {
+	if (!dev->tim.feat.hwwqe && tim_ring->disable_npa) {
 		tim_ring->nb_chunks =
 			tim_ring->nb_timers /
 			CNXK_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
 		tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts;
 	} else {
+		tim_ring->disable_npa = 0;
 		tim_ring->nb_chunks = tim_ring->nb_timers;
 	}
 
@@ -255,6 +282,14 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		goto tim_chnk_free;
 	}
 
+	if (dev->tim.feat.hwwqe) {
+		rc = cnxk_tim_enable_hwwqe(dev, tim_ring);
+		if (rc < 0) {
+			plt_err("Failed to enable hwwqe");
+			goto tim_chnk_free;
+		}
+	}
+
 	plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
 	plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index c5b3d67eb8..114a89ee5a 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -15,6 +15,7 @@
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
+#include <rte_vect.h>
 
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
@@ -29,6 +30,8 @@
 #define CNXK_TIM_MIN_CHUNK_SLOTS    (0x1)
 #define CNXK_TIM_MAX_CHUNK_SLOTS    (0x1FFE)
 #define CNXK_TIM_MAX_POOL_CACHE_SZ  (16)
+#define CNXK_TIM_HWWQE_RES_OFFSET_B (24)
+#define CNXK_TIM_ENT_PER_LMT	    (7)
 
 #define CN9K_TIM_MIN_TMO_TKS (256)
 
@@ -124,6 +127,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	uintptr_t tbase;
 	uint64_t (*tick_fn)(uint64_t tbase);
 	uint64_t ring_start_cyc;
+	uint64_t lmt_base;
 	struct cnxk_tim_bkt *bkt;
 	struct rte_mempool *chunk_pool;
 	struct rte_reciprocal_u64 fast_div;
@@ -310,11 +314,21 @@ TIM_ARM_FASTPATH_MODES
 TIM_ARM_TMO_FASTPATH_MODES
 #undef FP
 
+uint16_t cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers);
+
+uint16_t cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				      struct rte_event_timer **tim, const uint64_t timeout_tick,
+				      const uint16_t nb_timers);
+
 uint16_t
 cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 			    struct rte_event_timer **tim,
 			    const uint16_t nb_timers);
 
+uint16_t cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+					   struct rte_event_timer **tim, const uint16_t nb_timers);
+
 int cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 				 const struct rte_event_timer *evtim, uint64_t *ticks_remaining);
 
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 5e96f6f188..42d376d375 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -32,15 +32,6 @@ cnxk_tim_arm_checks(const struct cnxk_tim_ring *const tim_ring,
 	return -EINVAL;
 }
 
-static inline void
-cnxk_tim_format_event(const struct rte_event_timer *const tim,
-		      struct cnxk_tim_ent *const entry)
-{
-	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 |
-		    (tim->ev.event & 0xFFFFFFFFF);
-	entry->wqe = tim->ev.u64;
-}
-
 static __rte_always_inline uint16_t
 cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 			 struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -77,6 +68,24 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr, struct rte_event_timer **tim,
+			 const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t index;
+
+	for (index = 0; index < nb_timers; index++) {
+		if (cnxk_tim_arm_checks(tim_ring, tim[index]))
+			break;
+
+		if (cnxk_tim_add_entry_hwwqe(tim_ring, tim[index]))
+			break;
+	}
+
+	return index;
+}
+
 #define FP(_name, _f3, _f2, _f1, _flags)                                       \
 	uint16_t __rte_noinline cnxk_tim_arm_burst_##_name(                    \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -132,6 +141,29 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	return set_timers;
 }
 
+uint16_t
+cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+			     struct rte_event_timer **tim, const uint64_t timeout_tick,
+			     const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t idx;
+
+	if (unlikely(!timeout_tick || timeout_tick > tim_ring->nb_bkts)) {
+		const enum rte_event_timer_state state = timeout_tick ?
+								 RTE_EVENT_TIMER_ERROR_TOOLATE :
+								 RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		for (idx = 0; idx < nb_timers; idx++)
+			tim[idx]->state = state;
+
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	return cnxk_tim_add_entry_tmo_hwwqe(tim_ring, tim, timeout_tick * tim_ring->tck_int,
+					    nb_timers);
+}
+
 #define FP(_name, _f2, _f1, _flags)                                            \
 	uint16_t __rte_noinline cnxk_tim_arm_tmo_tick_burst_##_name(           \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -174,6 +206,38 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t i;
+
+	RTE_SET_USED(adptr);
+	for (i = 0; i < nb_timers; i++) {
+		if (tim[i]->state == RTE_EVENT_TIMER_CANCELED) {
+			rte_errno = EALREADY;
+			break;
+		}
+
+		if (tim[i]->state != RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		status = (uint64_t __rte_atomic *)&tim[i]->impl_opaque[1];
+		if (!rte_atomic_compare_exchange_strong_explicit(status, (uint64_t *)&tim[i], 0,
+								 rte_memory_order_release,
+								 rte_memory_order_relaxed)) {
+			rte_errno = ENOENT;
+			break;
+		}
+		tim[i]->state = RTE_EVENT_TIMER_CANCELED;
+	}
+
+	return i;
+}
+
 int
 cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 			     const struct rte_event_timer *evtim, uint64_t *ticks_remaining)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index e52eadbc08..be6744db51 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -132,6 +132,13 @@ cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
 	return (n - (d * rte_reciprocal_divide_u64(n, &R)));
 }
 
+static inline void
+cnxk_tim_format_event(const struct rte_event_timer *const tim, struct cnxk_tim_ent *const entry)
+{
+	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	entry->wqe = tim->ev.u64;
+}
+
 static __rte_always_inline void
 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
 			   const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
@@ -573,6 +580,200 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 	return nb_timers;
 }
 
+static int
+cnxk_tim_add_entry_hwwqe(struct cnxk_tim_ring *const tim_ring, struct rte_event_timer *const tim)
+{
+	uint64_t __rte_atomic *status;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+	int rc = 0;
+
+	status = (uint64_t __rte_atomic *)&tim->impl_opaque[0];
+	status[0] = 0;
+	status[1] = 0;
+
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	lmt = (uint64_t *)lmt_addr;
+
+	lmt[0] = tim->timeout_ticks * tim_ring->tck_int;
+	lmt[1] = 0x1;
+	lmt[2] = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	lmt[3] = (uint64_t)tim;
+
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (1UL << 4);
+	roc_lmt_submit_steorl(wdata, pa);
+
+	do {
+		rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+		rsp &= 0xF0UL;
+	} while (!rsp);
+
+	rsp >>= 4;
+	switch (rsp) {
+	case 0x3:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		rc = !rc;
+		break;
+	case 0x4:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+		rc = !rc;
+		break;
+	case 0x1:
+		tim->state = RTE_EVENT_TIMER_ARMED;
+		break;
+	default:
+		tim->state = RTE_EVENT_TIMER_ERROR;
+		rc = !rc;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+cnxk_tim_add_entry_tmo_hwwqe(struct cnxk_tim_ring *const tim_ring,
+			     struct rte_event_timer **const tim, uint64_t intvl, uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t cnt, i, j, done;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+
+	/* We have 32 LMTLINES per core, but use only 1 line as we need to check status */
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	done = 0;
+	lmt = (uint64_t *)lmt_addr;
+	/* We can do upto 7 timers per LMTLINE */
+	cnt = nb_timers / CNXK_TIM_ENT_PER_LMT;
+
+	lmt[0] = intvl;
+	lmt[1] = 0x1; /* Always relative */
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)(CNXK_TIM_ENT_PER_LMT << 4);
+	for (i = 0; i < cnt; i++) {
+		status = (uint64_t __rte_atomic *)&tim[i * CNXK_TIM_ENT_PER_LMT]->impl_opaque[0];
+
+		for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+			cnxk_tim_format_event(tim[(i * CNXK_TIM_ENT_PER_LMT) + j],
+					      (struct cnxk_tim_ent *)&lmt[(j << 1) + 2]);
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[0] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[1] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += CNXK_TIM_ENT_PER_LMT;
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x4:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic
+							      *)&tim[(i * CNXK_TIM_ENT_PER_LMT) + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+							RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+			goto done;
+		}
+	}
+
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)((nb_timers - cnt) << 4);
+	if (nb_timers - cnt) {
+		status = (uint64_t __rte_atomic *)&tim[cnt]->impl_opaque[0];
+
+		for (i = 0; i < nb_timers - cnt; i++) {
+			cnxk_tim_format_event(tim[cnt + i],
+					      (struct cnxk_tim_ent *)&lmt[(i << 1) + 2]);
+			tim[cnt + i]->impl_opaque[0] = 0;
+			tim[cnt + i]->impl_opaque[1] = 0;
+			tim[cnt + i]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += (nb_timers - cnt);
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x4:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < nb_timers - cnt; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic *)&tim[cnt + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+done:
+	return done;
+}
+
 static int
 cnxk_tim_rm_entry(struct rte_event_timer *tim)
 {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 01/22] event/cnxk: use stdatomic API
  2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
                           ` (20 preceding siblings ...)
  2024-10-25  8:13         ` [PATCH v5 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
@ 2024-10-25 12:29         ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 02/22] common/cnxk: implement SSO HW info pbhagavatula
                             ` (21 more replies)
  21 siblings, 22 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Replace gcc inbuilt __atomic_xxx intrinsics with rte_atomic_xxx API.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v2 Changes:
- Rebase and remove single dequeue and enqueue functions.
v3 Changes:
- Remove __atomic builtins.
v4 Changes:
- Rebase onto next-event tree.
v5 Changes:
- Rebase, shuffle release notes order.
v6 Changes:
- Remove unnecessary net/cnxk changes.

 drivers/event/cnxk/cn10k_eventdev.c         |  6 +--
 drivers/event/cnxk/cn10k_eventdev.h         |  4 +-
 drivers/event/cnxk/cn10k_tx_worker.h        |  7 ++-
 drivers/event/cnxk/cn10k_worker.c           | 15 +++---
 drivers/event/cnxk/cn10k_worker.h           |  2 +-
 drivers/event/cnxk/cn9k_eventdev.c          |  8 +--
 drivers/event/cnxk/cn9k_worker.h            | 19 ++++---
 drivers/event/cnxk/cnxk_eventdev.h          |  4 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c | 60 ++++++++++-----------
 drivers/event/cnxk/cnxk_tim_evdev.c         |  4 +-
 drivers/event/cnxk/cnxk_tim_evdev.h         | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.c        | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.h        | 57 ++++++++++----------
 drivers/event/cnxk/cnxk_worker.h            |  3 +-
 14 files changed, 108 insertions(+), 101 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4edac33a84..4a2c88c8c6 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -94,9 +94,9 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	uint64_t val;

 	ws->grp_base = grp_base;
-	ws->fc_mem = (int64_t *)dev->fc_iova;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 	ws->xaq_lmt = dev->xaq_lmt;
-	ws->fc_cache_space = dev->fc_cache_space;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);

@@ -768,7 +768,7 @@ cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 	for (i = 0; i < dev->nb_event_ports; i++) {
 		struct cn10k_sso_hws *ws = event_dev->data->ports[i];
 		ws->xaq_lmt = dev->xaq_lmt;
-		ws->fc_mem = (int64_t *)dev->fc_iova;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 		ws->tstamp = dev->tstamp;
 		if (lookup_mem)
 			ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index 372121465c..b8395aa314 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -19,8 +19,8 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	uint64_t meta_aura;
 	/* Add Work Fastpath data */
-	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
-	int64_t *fc_cache_space;
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
 	int32_t xaq_lmt;
diff --git a/drivers/event/cnxk/cn10k_tx_worker.h b/drivers/event/cnxk/cn10k_tx_worker.h
index 0695ea23e1..19cb2e22e5 100644
--- a/drivers/event/cnxk/cn10k_tx_worker.h
+++ b/drivers/event/cnxk/cn10k_tx_worker.h
@@ -51,7 +51,9 @@ cn10k_sso_txq_fc_wait(const struct cn10k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -60,7 +62,8 @@ static __rte_always_inline int32_t
 cn10k_sso_sq_depth(const struct cn10k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index c49138316c..06ad7437d5 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -16,7 +16,7 @@ cn10k_sso_hws_new_event(struct cn10k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -80,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
-	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) *
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
 	       ROC_SSO_XAE_PER_XAQ;
 }

@@ -90,19 +90,20 @@ sso_lmt_aw_wait_fc(struct cn10k_sso_hws *ws, int64_t req)
 	int64_t cached, refill;

 retry:
-	while (__atomic_load_n(ws->fc_cache_space, __ATOMIC_RELAXED) < 0)
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
 		;

-	cached = __atomic_fetch_sub(ws->fc_cache_space, req, __ATOMIC_ACQUIRE) - req;
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
 	/* Check if there is enough space, else update and retry. */
 	if (cached < 0) {
 		/* Check if we have space else retry. */
 		do {
 			refill = sso_read_xaq_space(ws);
 		} while (refill <= 0);
-		__atomic_compare_exchange(ws->fc_cache_space, &cached, &refill,
-					  0, __ATOMIC_RELEASE,
-					  __ATOMIC_RELAXED);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
 		goto retry;
 	}
 }
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 5d3394508e..954dee5a2a 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -311,7 +311,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 		roc_load_pair(gw.u64[0], gw.u64[1],
 			      ws->base + SSOW_LF_GWS_WQE0);
 	} while (gw.u64[0] & BIT_ULL(63));
-	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
 #endif
 	ws->gw_rdata = gw.u64[0];
 	if (gw.u64[1])
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index b176044aa5..05e237c005 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -74,7 +74,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	if (dev->dual_ws) {
 		dws = hws;
 		dws->grp_base = grp_base;
-		dws->fc_mem = (uint64_t *)dev->fc_iova;
+		dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		dws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, dws->base[0] + SSOW_LF_GWS_NW_TIM);
@@ -82,7 +82,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	} else {
 		ws = hws;
 		ws->grp_base = grp_base;
-		ws->fc_mem = (uint64_t *)dev->fc_iova;
+		ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		ws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
@@ -822,14 +822,14 @@ cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 			struct cn9k_sso_hws_dual *dws =
 				event_dev->data->ports[i];
 			dws->xaq_lmt = dev->xaq_lmt;
-			dws->fc_mem = (uint64_t *)dev->fc_iova;
+			dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			dws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				dws->lookup_mem = lookup_mem;
 		} else {
 			struct cn9k_sso_hws *ws = event_dev->data->ports[i];
 			ws->xaq_lmt = dev->xaq_lmt;
-			ws->fc_mem = (uint64_t *)dev->fc_iova;
+			ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			ws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 064cdfe94a..f07b8a9bff 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -28,7 +28,7 @@ cn9k_sso_hws_new_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -71,7 +71,7 @@ cn9k_sso_hws_new_event_wait(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (ws->xaq_lmt <= __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED))
+	while (ws->xaq_lmt <= rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -93,7 +93,7 @@ cn9k_sso_hws_forward_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(ws->base);
 		cn9k_sso_hws_new_event_wait(ws, ev);
 	}
@@ -110,7 +110,7 @@ cn9k_sso_hws_dual_new_event(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (dws->xaq_lmt <= *dws->fc_mem)
 		return 0;

@@ -128,7 +128,7 @@ cn9k_sso_hws_dual_new_event_wait(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (dws->xaq_lmt <= __atomic_load_n(dws->fc_mem, __ATOMIC_RELAXED))
+	while (dws->xaq_lmt <= rte_atomic_load_explicit(dws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -151,7 +151,7 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, uint64_t base,
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(base);
 		cn9k_sso_hws_dual_new_event_wait(dws, ev);
 	}
@@ -571,7 +571,9 @@ cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -740,7 +742,8 @@ static __rte_always_inline int32_t
 cn9k_sso_sq_depth(const struct cn9k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index f147ef3c78..982bbb6a9b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -136,7 +136,7 @@ struct __rte_cache_aligned cn9k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
@@ -154,7 +154,7 @@ struct __rte_cache_aligned cn9k_sso_hws_dual {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 95c0f1b1f7..a4615c1356 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -63,7 +63,7 @@ seqn_list_update(int val)
 		return -1;

 	seqn_list[seqn_list_index++] = val;
-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	return 0;
 }

@@ -82,7 +82,7 @@ seqn_list_check(int limit)
 }

 struct test_core_param {
-	uint32_t *total_events;
+	uint32_t __rte_atomic *total_events;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port;
 	uint8_t sched_type;
@@ -540,13 +540,13 @@ static int
 worker_multi_port_fn(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;
 	int ret;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;
@@ -554,30 +554,30 @@ worker_multi_port_fn(void *arg)
 		ret = validate_event(&ev);
 		RTE_TEST_ASSERT_SUCCESS(ret, "Failed to validate event");
 		rte_pktmbuf_free(ev.mbuf);
-		__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+		rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 	}

 	return 0;
 }

 static inline int
-wait_workers_to_join(const uint32_t *count)
+wait_workers_to_join(const uint32_t __rte_atomic *count)
 {
 	uint64_t cycles, print_cycles;

 	cycles = rte_get_timer_cycles();
 	print_cycles = cycles;
-	while (__atomic_load_n(count, __ATOMIC_RELAXED)) {
+	while (rte_atomic_load_explicit(count, rte_memory_order_relaxed)) {
 		uint64_t new_cycles = rte_get_timer_cycles();

 		if (new_cycles - print_cycles > rte_get_timer_hz()) {
 			plt_info("Events %d",
-				 __atomic_load_n(count, __ATOMIC_RELAXED));
+				 rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			print_cycles = new_cycles;
 		}
 		if (new_cycles - cycles > rte_get_timer_hz() * 10000000000) {
 			plt_err("No schedules for seconds, deadlock (%d)",
-				__atomic_load_n(count, __ATOMIC_RELAXED));
+				rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			rte_event_dev_dump(evdev, stdout);
 			cycles = new_cycles;
 			return -1;
@@ -593,7 +593,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 			int (*worker_thread)(void *), uint32_t total_events,
 			uint8_t nb_workers, uint8_t sched_type)
 {
-	uint32_t atomic_total_events;
+	uint32_t __rte_atomic atomic_total_events;
 	struct test_core_param *param;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port = 0;
@@ -603,7 +603,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 	if (!nb_workers)
 		return 0;

-	__atomic_store_n(&atomic_total_events, total_events, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&atomic_total_events, total_events, rte_memory_order_relaxed);
 	seqn_list_init();

 	param = malloc(sizeof(struct test_core_param) * nb_workers);
@@ -640,7 +640,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		param[port].sched_type = sched_type;
 		param[port].port = port;
 		param[port].dequeue_tmo_ticks = dequeue_tmo_ticks;
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		w_lcore = rte_get_next_lcore(w_lcore, 1, 0);
 		if (w_lcore == RTE_MAX_LCORE) {
 			plt_err("Failed to get next available lcore");
@@ -651,7 +651,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		rte_eal_remote_launch(worker_thread, &param[port], w_lcore);
 	}

-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	ret = wait_workers_to_join(&atomic_total_events);
 	free(param);

@@ -890,13 +890,13 @@ worker_flow_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -916,8 +916,8 @@ worker_flow_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1046,13 +1046,13 @@ worker_group_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -1072,8 +1072,8 @@ worker_group_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1205,19 +1205,19 @@ static int
 worker_flow_based_pipeline_max_stages_rand_sched_type(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.sub_event_type == MAX_STAGES) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.sub_event_type++;
@@ -1284,16 +1284,16 @@ worker_queue_based_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
@@ -1329,16 +1329,16 @@ worker_mixed_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* Last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index bba70646fa..74a6da5070 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -358,7 +358,7 @@ cnxk_tim_stats_get(const struct rte_event_timer_adapter *adapter,
 		tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;

 	stats->evtim_exp_count =
-		__atomic_load_n(&tim_ring->arm_cnt, __ATOMIC_RELAXED);
+		rte_atomic_load_explicit(&tim_ring->arm_cnt, rte_memory_order_relaxed);
 	stats->ev_enq_count = stats->evtim_exp_count;
 	stats->adapter_tick_count =
 		rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div);
@@ -370,7 +370,7 @@ cnxk_tim_stats_reset(const struct rte_event_timer_adapter *adapter)
 {
 	struct cnxk_tim_ring *tim_ring = adapter->data->adapter_priv;

-	__atomic_store_n(&tim_ring->arm_cnt, 0, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&tim_ring->arm_cnt, 0, rte_memory_order_relaxed);
 	return 0;
 }

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 6cf10dbf4d..f4c61dfb44 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -108,15 +108,15 @@ struct cnxk_tim_evdev {
 struct cnxk_tim_bkt {
 	uint64_t first_chunk;
 	union {
-		uint64_t w1;
+		uint64_t __rte_atomic w1;
 		struct {
-			uint32_t nb_entry;
+			uint32_t __rte_atomic nb_entry;
 			uint8_t sbt : 1;
 			uint8_t hbt : 1;
 			uint8_t bsk : 1;
 			uint8_t rsvd : 5;
-			uint8_t lock;
-			int16_t chunk_remainder;
+			uint8_t __rte_atomic lock;
+			int16_t __rte_atomic chunk_remainder;
 		};
 	};
 	uint64_t current_chunk;
@@ -134,7 +134,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	struct rte_reciprocal_u64 fast_div;
 	struct rte_reciprocal_u64 fast_bkt;
 	uint64_t tck_int;
-	uint64_t arm_cnt;
+	uint64_t __rte_atomic arm_cnt;
 	uintptr_t base;
 	uint8_t prod_type_sp;
 	uint8_t enable_stats;
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 1f2f2fe5d8..db31f91818 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -70,7 +70,7 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, index, __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, index, rte_memory_order_relaxed);

 	return index;
 }
@@ -124,8 +124,8 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers,
-				   __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, set_timers,
+					      rte_memory_order_relaxed);

 	return set_timers;
 }
@@ -151,7 +151,7 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	int ret;

 	RTE_SET_USED(adptr);
-	rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+	rte_atomic_thread_fence(rte_memory_order_acquire);
 	for (index = 0; index < nb_timers; index++) {
 		if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) {
 			rte_errno = EALREADY;
@@ -193,7 +193,7 @@ cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 		return -ENOENT;

 	bkt = (struct cnxk_tim_bkt *)evtim->impl_opaque[1];
-	sema = __atomic_load_n(&bkt->w1, rte_memory_order_acquire);
+	sema = rte_atomic_load_explicit(&bkt->w1, rte_memory_order_acquire);
 	if (cnxk_tim_bkt_get_hbt(sema) || !cnxk_tim_bkt_get_nent(sema))
 		return -ENOENT;

diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index f530d8c5c4..e52eadbc08 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -23,19 +23,19 @@ cnxk_tim_bkt_fetch_rem(uint64_t w1)
 static inline int16_t
 cnxk_tim_bkt_get_rem(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
+	return rte_atomic_load_explicit(&bktp->chunk_remainder, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_set_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_sub_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline uint8_t
@@ -56,20 +56,20 @@ cnxk_tim_bkt_clr_bsk(struct cnxk_tim_bkt *bktp)
 	/* Clear everything except lock. */
 	const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema_lock(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
-				  __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
+					     rte_memory_order_acquire);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA, rte_memory_order_relaxed);
 }

 static inline uint64_t
@@ -77,19 +77,19 @@ cnxk_tim_bkt_inc_lock(struct cnxk_tim_bkt *bktp)
 {
 	const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, v, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_dec_lock(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_release);
 }

 static inline void
 cnxk_tim_bkt_dec_lock_relaxed(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_relaxed);
 }

 static inline uint32_t
@@ -102,19 +102,19 @@ cnxk_tim_bkt_get_nent(uint64_t w1)
 static inline void
 cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_add(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, 1, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent_relaxed(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELEASE);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_release);
 }

 static inline uint64_t
@@ -123,7 +123,7 @@ cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
 	const uint64_t v =
 		~(TIM_BUCKET_W1_M_NUM_ENTRIES << TIM_BUCKET_W1_S_NUM_ENTRIES);

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL) & v;
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel) & v;
 }

 static inline uint64_t
@@ -273,8 +273,8 @@ cnxk_tim_add_entry_sp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -356,8 +356,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -385,8 +385,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 			     : [crem] "r"(&bkt->w1)
 			     : "memory");
 #else
-		while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
-		       0)
+		while (rte_atomic_load_explicit((int64_t __rte_atomic *)&bkt->w1,
+						rte_memory_order_relaxed) < 0)
 			;
 #endif
 		goto __retry;
@@ -408,15 +408,14 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 		*chunk = *pent;
 		if (cnxk_tim_bkt_fetch_lock(lock_sema)) {
 			do {
-				lock_sema = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				lock_sema = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (cnxk_tim_bkt_fetch_lock(lock_sema) - 1);
 		}
-		rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+		rte_atomic_thread_fence(rte_memory_order_acquire);
 		mirr_bkt->current_chunk = (uintptr_t)chunk;
-		__atomic_store_n(&bkt->chunk_remainder,
-				 tim_ring->nb_chunk_slots - 1,
-				 __ATOMIC_RELEASE);
+		rte_atomic_store_explicit(&bkt->chunk_remainder, tim_ring->nb_chunk_slots - 1,
+					  rte_memory_order_release);
 	} else {
 		chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
 		chunk += tim_ring->nb_chunk_slots - rem;
@@ -489,8 +488,8 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -521,7 +520,7 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 			     : [lock] "r"(&bkt->lock)
 			     : "memory");
 #else
-		while (__atomic_load_n(&bkt->lock, __ATOMIC_RELAXED))
+		while (rte_atomic_load_explicit(&bkt->lock, rte_memory_order_relaxed))
 			;
 #endif
 		goto __retry;
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 0e0d728ba4..3592344e04 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -33,7 +33,8 @@ cnxk_sso_hws_swtag_desched(uint32_t tag, uint8_t new_tt, uint16_t grp,
 	uint64_t val;

 	val = tag | ((uint64_t)(new_tt & 0x3) << 32) | ((uint64_t)grp << 34);
-	__atomic_store_n((uint64_t *)swtag_desched_op, val, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit((uint64_t __rte_atomic *)swtag_desched_op, val,
+				  rte_memory_order_release);
 }

 static __rte_always_inline void
--
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 02/22] common/cnxk: implement SSO HW info
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
                             ` (20 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Ankur Dwivedi, Anoob Joseph, Tejasree Kondoj, Pavan Nikhilesh,
	Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HW info mbox to get hardware capabilities, and reuse
them instead of depending on hardcoded values.
Remove redundant includes.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_mbox.h              | 28 ++++++++++
 drivers/common/cnxk/roc_sso.c               | 58 ++++++++++++++++++---
 drivers/common/cnxk/roc_sso.h               |  9 ++--
 drivers/common/cnxk/version.map             |  1 +
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c   |  5 +-
 drivers/crypto/cnxk/cn9k_cryptodev_ops.c    |  9 +---
 drivers/event/cnxk/cn10k_eventdev.c         |  1 +
 drivers/event/cnxk/cn10k_eventdev.h         |  1 +
 drivers/event/cnxk/cn10k_worker.c           |  6 ++-
 drivers/event/cnxk/cnxk_eventdev.c          |  4 +-
 drivers/event/cnxk/cnxk_eventdev.h          |  3 --
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  2 +
 drivers/event/cnxk/cnxk_eventdev_stats.c    |  2 +
 drivers/event/cnxk/cnxk_tim_evdev.c         |  2 +-
 drivers/event/cnxk/cnxk_tim_worker.c        |  2 +
 drivers/event/cnxk/cnxk_worker.c            |  4 +-
 16 files changed, 103 insertions(+), 34 deletions(-)

diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index dd65946e9e..63139b5517 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,7 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
 	  tim_lf_alloc_rsp)                                                    \
@@ -2119,6 +2120,33 @@ struct ssow_chng_mship {
 	uint16_t __io hwgrps[MAX_RVU_BLKLF_CNT]; /* Array of hwgrps. */
 };
 
+struct sso_feat_info {
+	uint8_t __io hw_flr : 1;
+	uint8_t __io hw_prefetch : 1;
+	uint8_t __io sw_prefetch : 1;
+	uint8_t __io lsw : 1;
+	uint8_t __io fwd_grp : 1;
+	uint8_t __io eva_present : 1;
+	uint8_t __io no_nsched : 1;
+	uint8_t __io tag_cfg : 1;
+	uint8_t __io gwc_per_core;
+	uint16_t __io hws;
+	uint16_t __io hwgrps;
+	uint16_t __io hwgrps_per_pf;
+	uint16_t __io iue;
+	uint16_t __io taq_lines;
+	uint16_t __io taq_ent_per_line;
+	uint16_t __io xaq_buf_size;
+	uint16_t __io xaq_wq_entries;
+	uint32_t __io eva_ctx_per_hwgrp;
+	uint64_t __io rsvd[2];
+};
+
+struct sso_hw_info {
+	struct mbox_msghdr hdr;
+	struct sso_feat_info feat;
+};
+
 struct sso_hw_setconfig {
 	struct mbox_msghdr hdr;
 	uint32_t __io npa_aura_id;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 2e3b134bfc..8a219b985b 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -191,7 +191,7 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 		goto exit;
 	}
 
-	roc_sso->max_hwgrp = rsrc_cnt->sso;
+	roc_sso->max_hwgrp = PLT_MIN(rsrc_cnt->sso, roc_sso->feat.hwgrps_per_pf);
 	roc_sso->max_hws = rsrc_cnt->ssow;
 
 	rc = 0;
@@ -200,6 +200,37 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 	return rc;
 }
 
+static int
+sso_hw_info_get(struct roc_sso *roc_sso)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct sso_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_sso_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc == MBOX_MSG_INVALID) {
+		roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	} else {
+		mbox_memcpy(&roc_sso->feat, &rsp->feat, sizeof(roc_sso->feat));
+
+		if (!roc_sso->feat.hwgrps_per_pf)
+			roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	}
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 void
 sso_hws_link_modify(uint8_t hws, uintptr_t base, struct plt_bitmap *bmp, uint16_t hwgrp[],
 		    uint16_t n, uint8_t set, uint16_t enable)
@@ -319,6 +350,12 @@ roc_sso_hwgrp_base_get(struct roc_sso *roc_sso, uint16_t hwgrp)
 	return dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | hwgrp << 12);
 }
 
+uint16_t
+roc_sso_pf_func_get(void)
+{
+	return idev_sso_pffunc_get();
+}
+
 uint64_t
 roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 {
@@ -670,9 +707,8 @@ roc_sso_hwgrp_init_xaq_aura(struct roc_sso *roc_sso, uint32_t nb_xae)
 	struct dev *dev = &sso->dev;
 	int rc;
 
-	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae,
-				     roc_sso->xae_waes, roc_sso->xaq_buf_size,
-				     roc_sso->nb_hwgrp);
+	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae, roc_sso->feat.xaq_wq_entries,
+				     roc_sso->feat.xaq_buf_size, roc_sso->nb_hwgrp);
 	return rc;
 }
 
@@ -953,9 +989,11 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 		goto hwgrp_alloc_fail;
 	}
 
-	roc_sso->xaq_buf_size = rsp_hwgrp->xaq_buf_size;
-	roc_sso->xae_waes = rsp_hwgrp->xaq_wq_entries;
-	roc_sso->iue = rsp_hwgrp->in_unit_entries;
+	if (!roc_sso->feat.xaq_buf_size || !roc_sso->feat.xaq_wq_entries || !roc_sso->feat.iue) {
+		roc_sso->feat.xaq_buf_size = rsp_hwgrp->xaq_buf_size;
+		roc_sso->feat.xaq_wq_entries = rsp_hwgrp->xaq_wq_entries;
+		roc_sso->feat.iue = rsp_hwgrp->in_unit_entries;
+	}
 
 	rc = sso_msix_fill(roc_sso, nb_hws, nb_hwgrp);
 	if (rc < 0) {
@@ -1059,6 +1097,12 @@ roc_sso_dev_init(struct roc_sso *roc_sso)
 		goto fail;
 	}
 
+	rc = sso_hw_info_get(roc_sso);
+	if (rc < 0) {
+		plt_err("Failed to get SSO HW info");
+		goto fail;
+	}
+
 	rc = sso_rsrc_get(roc_sso);
 	if (rc < 0) {
 		plt_err("Failed to get SSO resources");
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 4ac901762e..021db22c86 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -8,7 +8,7 @@
 #include "hw/ssow.h"
 
 #define ROC_SSO_AW_PER_LMT_LINE_LOG2 3
-#define ROC_SSO_XAE_PER_XAQ	     352
+#define ROC_SSO_MAX_HWGRP_PER_PF     256
 
 struct roc_sso_hwgrp_qos {
 	uint16_t hwgrp;
@@ -57,9 +57,7 @@ struct roc_sso {
 	uintptr_t lmt_base;
 	struct roc_sso_xaq_data xaq;
 	/* HW Const. */
-	uint32_t xae_waes;
-	uint32_t xaq_buf_size;
-	uint32_t iue;
+	struct sso_feat_info feat;
 	/* Private data. */
 #define ROC_SSO_MEM_SZ (16 * 1024)
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
@@ -103,6 +101,9 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
 
+/* Utility function */
+uint16_t __roc_api roc_sso_pf_func_get(void);
+
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 877333b80c..de748ac409 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -516,6 +516,7 @@ INTERNAL {
 	roc_sso_hws_gwc_invalidate;
 	roc_sso_hws_unlink;
 	roc_sso_ns_to_gw;
+	roc_sso_pf_func_get;
 	roc_sso_rsrc_fini;
 	roc_sso_rsrc_init;
 	roc_tim_fini;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 88ea032bcb..dbebc5aef1 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -11,10 +11,7 @@
 
 #include <ethdev_driver.h>
 
-#include "roc_cpt.h"
-#include "roc_idev.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn10k_cryptodev.h"
 #include "cn10k_cryptodev_event_dp.h"
diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
index ae00af5019..8d10bc9f9b 100644
--- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
@@ -8,14 +8,7 @@
 #include <rte_ip.h>
 #include <rte_vect.h>
 
-#include "roc_cpt.h"
-#if defined(__aarch64__)
-#include "roc_io.h"
-#else
-#include "roc_io_generic.h"
-#endif
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn9k_cryptodev.h"
 #include "cn9k_cryptodev_ops.h"
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4a2c88c8c6..c7af0fac11 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -64,6 +64,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
 
 	return ws;
 }
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index b8395aa314..4f0eab8acb 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -23,6 +23,7 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
+	uint16_t xae_waes;
 	int32_t xaq_lmt;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 06ad7437d5..80077ec8a1 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cn10k_worker.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -81,7 +83,7 @@ static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
 	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
-	       ROC_SSO_XAE_PER_XAQ;
+		ws->xae_waes;
 }
 
 static inline void
@@ -394,7 +396,7 @@ cn10k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
 	int32_t space;
 
 	/* Do a common back-pressure check and return */
-	space = sso_read_xaq_space(ws) - ROC_SSO_XAE_PER_XAQ;
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
 	if (space <= 0)
 		return 0;
 	nb_events = space < nb_events ? space : nb_events;
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 84a55511a3..ab7420ab79 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -2,7 +2,7 @@
  * Copyright(C) 2021 Marvell.
  */
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
@@ -47,7 +47,7 @@ cnxk_sso_xaq_allocate(struct cnxk_sso_evdev *dev)
 	if (dev->num_events > 0)
 		xae_cnt = dev->num_events;
 	else
-		xae_cnt = dev->sso.iue;
+		xae_cnt = dev->sso.feat.iue;
 
 	if (dev->xae_cnt)
 		xae_cnt += dev->xae_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 982bbb6a9b..904a9b022d 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -21,9 +21,6 @@
 
 #include "cnxk_eventdev_dp.h"
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-
 #include "cnxk_tim_evdev.h"
 
 #define CNXK_SSO_XAE_CNT   "xae_cnt"
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index a4615c1356..311de3d92b 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -18,6 +18,8 @@
 #include <rte_random.h>
 #include <rte_test.h>
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_eventdev_stats.c b/drivers/event/cnxk/cnxk_eventdev_stats.c
index a8a87a06e4..6dea91aedf 100644
--- a/drivers/event/cnxk/cnxk_eventdev_stats.c
+++ b/drivers/event/cnxk/cnxk_eventdev_stats.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 74a6da5070..27a4dfb490 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -4,7 +4,7 @@
 
 #include <math.h>
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_tim_evdev.h"
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index db31f91818..5e96f6f188 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_tim_evdev.h"
 #include "cnxk_tim_worker.h"
 
diff --git a/drivers/event/cnxk/cnxk_worker.c b/drivers/event/cnxk/cnxk_worker.c
index 60876abcff..a07c9185d9 100644
--- a/drivers/event/cnxk/cnxk_worker.c
+++ b/drivers/event/cnxk/cnxk_worker.c
@@ -6,9 +6,7 @@
 #include <rte_pmd_cnxk_eventdev.h>
 #include <rte_eventdev.h>
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 struct pwords {
 	uint64_t u[5];
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 03/22] event/cnxk: add CN20K specific device probe
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 02/22] common/cnxk: implement SSO HW info pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 04/22] event/cnxk: add CN20K device config pbhagavatula
                             ` (19 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra, Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add platform specific event device probe and remove, also add
event device info get function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 doc/guides/eventdevs/cnxk.rst          | 23 ++++---
 doc/guides/rel_notes/release_24_11.rst |  4 ++
 drivers/common/cnxk/roc_sso.c          | 10 ++-
 drivers/event/cnxk/cn20k_eventdev.c    | 93 ++++++++++++++++++++++++++
 drivers/event/cnxk/meson.build         |  8 ++-
 5 files changed, 124 insertions(+), 14 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.c

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index e21846f4e0..55028f889b 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -16,6 +16,7 @@ Supported OCTEON cnxk SoCs
 
 - CN9XX
 - CN10XX
+- CN20XX
 
 Features
 --------
@@ -36,7 +37,7 @@ Features of the OCTEON cnxk SSO PMD are:
   DRAM
 - HW accelerated dequeue timeout support to enable power management
 - HW managed event timers support through TIM, with high precision and
-  time granularity of 2.5us on CN9K and 1us on CN10K.
+  time granularity of 2.5us on CN9K and 1us on CN10K/CN20K.
 - Up to 256 TIM rings a.k.a event timer adapters.
 - Up to 8 rings traversed in parallel.
 - HW managed packets enqueued from ethdev to eventdev exposed through event eth
@@ -45,8 +46,8 @@ Features of the OCTEON cnxk SSO PMD are:
 - Lockfree Tx from event eth Tx adapter using ``RTE_ETH_TX_OFFLOAD_MT_LOCKFREE``
   capability while maintaining receive packet order.
 - Full Rx/Tx offload support defined through ethdev queue configuration.
-- HW managed event vectorization on CN10K for packets enqueued from ethdev to
-  eventdev configurable per each Rx queue in Rx adapter.
+- HW managed event vectorization on CN10K/CN20K for packets enqueued from ethdev
+  to eventdev configurable per each Rx queue in Rx adapter.
 - Event vector transmission via Tx adapter.
 - Up to 2 event link profiles.
 
@@ -93,13 +94,13 @@ Runtime Config Options
 
     -a 0002:0e:00.0,qos=[1-50-50]
 
-- ``CN10K WQE stashing support``
+- ``CN10K/CN20K WQE stashing support``
 
-  CN10K supports stashing the scheduled WQE carried by `rte_event` to the
-  cores L2 Dcache. The number of cache lines to be stashed and the offset
-  is configurable per HWGRP i.e. event queue. The dictionary format is as
-  follows `[Qx|stash_offset|stash_length]` here the stash offset can be
-  a negative integer.
+  CN10K/CN20K supports stashing the scheduled WQE carried by `rte_event`
+  to the cores L2 Dcache. The number of cache lines to be stashed and the
+  offset is configurable per HWGRP i.e. event queue. The dictionary format
+  is as follows `[Qx|stash_offset|stash_length]` here the stash offset can
+  be a negative integer.
   By default, stashing is enabled on queues which have been connected to
   Rx adapter. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed.
 
@@ -188,8 +189,8 @@ Runtime Config Options
 
     -a 0002:0e:00.0,tim_eclk_freq=122880000-1000000000-0
 
-Power Saving on CN10K
----------------------
+Power Saving on CN10K/CN20K
+---------------------------
 
 ARM cores can additionally use WFE when polling for transactions on SSO bus
 to save power i.e., in the event dequeue call ARM core can enter WFE and exit
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 5461798970..680d7a0199 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -231,6 +231,10 @@ New Features
 
   * Added independent enqueue feature.
 
+* **Updated Marvell cnxk event device driver.**
+
+  * Added eventdev driver support for CN20K SoC.
+
 * **Added IPv4 network order lookup in the FIB library.**
 
   A new flag field is introduced in ``rte_fib_conf`` structure.
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 8a219b985b..45cf6fc39e 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -870,7 +870,10 @@ sso_update_msix_vec_count(struct roc_sso *roc_sso, uint16_t sso_vec_cnt)
 	if (idev == NULL)
 		return -ENODEV;
 
-	mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
+	if (roc_model_is_cn20k())
+		mbox_vec_cnt = RVU_MBOX_PF_INT_VEC_AFPF_MBOX + 1;
+	else
+		mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
 
 	/* Allocating vectors for the first time */
 	if (plt_intr_max_intr_get(pci_dev->intr_handle) == 0) {
@@ -1017,7 +1020,10 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 	}
 
 	/* 2 error interrupt per TIM LF */
-	sso_vec_cnt += 2 * nb_tim_lfs;
+	if (roc_model_is_cn20k())
+		sso_vec_cnt += 3 * nb_tim_lfs;
+	else
+		sso_vec_cnt += 2 * nb_tim_lfs;
 
 	rc = sso_update_msix_vec_count(roc_sso, sso_vec_cnt);
 	if (rc < 0) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
new file mode 100644
index 0000000000..c4b80f64f3
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include "roc_api.h"
+
+#include "cnxk_eventdev.h"
+
+static void
+cn20k_sso_set_rsrc(void *arg)
+{
+	struct cnxk_sso_evdev *dev = arg;
+
+	dev->max_event_ports = dev->sso.max_hws;
+	dev->max_event_queues = dev->sso.max_hwgrp > RTE_EVENT_MAX_QUEUES_PER_DEV ?
+					RTE_EVENT_MAX_QUEUES_PER_DEV :
+					dev->sso.max_hwgrp;
+}
+
+static void
+cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	dev_info->driver_name = RTE_STR(EVENTDEV_NAME_CN20K_PMD);
+	cnxk_sso_info_get(dev, dev_info);
+	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
+}
+
+static struct eventdev_ops cn20k_sso_dev_ops = {
+	.dev_infos_get = cn20k_sso_info_get,
+};
+
+static int
+cn20k_sso_init(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = roc_plt_init();
+	if (rc < 0) {
+		plt_err("Failed to initialize platform model");
+		return rc;
+	}
+
+	event_dev->dev_ops = &cn20k_sso_dev_ops;
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	rc = cnxk_sso_init(event_dev);
+	if (rc < 0)
+		return rc;
+
+	cn20k_sso_set_rsrc(cnxk_sso_pmd_priv(event_dev));
+	if (!dev->max_event_ports || !dev->max_event_queues) {
+		plt_err("Not enough eventdev resource queues=%d ports=%d", dev->max_event_queues,
+			dev->max_event_ports);
+		cnxk_sso_fini(event_dev);
+		return -ENODEV;
+	}
+
+	plt_sso_dbg("Initializing %s max_queues=%d max_ports=%d", event_dev->data->name,
+		    dev->max_event_queues, dev->max_event_ports);
+
+	return 0;
+}
+
+static int
+cn20k_sso_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	return rte_event_pmd_pci_probe(pci_drv, pci_dev, sizeof(struct cnxk_sso_evdev),
+				       cn20k_sso_init);
+}
+
+static const struct rte_pci_id cn20k_pci_sso_map[] = {
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_PF),
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_VF),
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver cn20k_pci_sso = {
+	.id_table = cn20k_pci_sso_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
+	.probe = cn20k_sso_probe,
+	.remove = cnxk_sso_remove,
+};
+
+RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
+RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
+RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 6757af74bf..21cd5c5ae6 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -14,7 +14,7 @@ else
         soc_type = platform
 endif
 
-if soc_type != 'cn9k' and soc_type != 'cn10k'
+if soc_type != 'cn9k' and soc_type != 'cn10k' and soc_type != 'cn20k'
         soc_type = 'all'
 endif
 
@@ -229,6 +229,12 @@ sources += files(
 endif
 endif
 
+if soc_type == 'cn20k' or soc_type == 'all'
+sources += files(
+        'cn20k_eventdev.c',
+)
+endif
+
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
 if cc.get_id() == 'clang'
         extra_flags += ['-Wno-asm-operand-widths']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 04/22] event/cnxk: add CN20K device config
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 02/22] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
                             ` (18 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event device configuration that attaches the requested
number of SSO HWS(event ports) and HWGRP(event queues) LFs to
the RVU PF/VF.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index c4b80f64f3..753a976cd3 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -17,6 +17,17 @@ cn20k_sso_set_rsrc(void *arg)
 					dev->sso.max_hwgrp;
 }
 
+static int
+cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
+{
+	struct cnxk_tim_evdev *tim_dev = cnxk_tim_priv_get();
+	struct cnxk_sso_evdev *dev = arg;
+	uint16_t nb_tim_lfs;
+
+	nb_tim_lfs = tim_dev ? tim_dev->nb_rings : 0;
+	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -27,8 +38,33 @@ cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *de
 	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
 }
 
+static int
+cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = cnxk_sso_dev_validate(event_dev, 1, UINT32_MAX);
+	if (rc < 0) {
+		plt_err("Invalid event device configuration");
+		return -EINVAL;
+	}
+
+	rc = cn20k_sso_rsrc_init(dev, dev->nb_event_ports, dev->nb_event_queues);
+	if (rc < 0) {
+		plt_err("Failed to initialize SSO resources");
+		return -ENODEV;
+	}
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
+	.dev_configure = cn20k_sso_dev_configure,
+
+	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 05/22] event/cnxk: add CN20k event queue configuration
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (2 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 04/22] event/cnxk: add CN20K device config pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
                             ` (17 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add setup and release functions for event queues i.e. SSO HWGRPs.
Allocate buffers in DRAM that hold inflight events.
Register device args to modify inflight event buffer count,
HWGRP QoS and stash.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c |  2 +-
 drivers/event/cnxk/cn20k_eventdev.c | 14 ++++++++++++++
 drivers/event/cnxk/cnxk_eventdev.c  |  4 ++--
 drivers/event/cnxk/cnxk_eventdev.h  |  2 +-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index c7af0fac11..49805dd91d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -1251,7 +1251,7 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
-			      CN10K_SSO_STASH "=<string>"
+			      CNXK_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
 			      CNXK_TIM_RINGS_LMT "=<int>"
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 753a976cd3..b876c36806 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -56,6 +56,12 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 		return -ENODEV;
 	}
 
+	rc = cnxk_sso_xaq_allocate(dev);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+cnxk_rsrc_fini:
+	roc_sso_rsrc_fini(&dev->sso);
 	return rc;
 }
 
@@ -64,6 +70,10 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_configure = cn20k_sso_dev_configure,
 
 	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.queue_setup = cnxk_sso_queue_setup,
+	.queue_release = cnxk_sso_queue_release,
+	.queue_attr_set = cnxk_sso_queue_attribute_set,
+
 	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
@@ -127,3 +137,7 @@ static struct rte_pci_driver cn20k_pci_sso = {
 RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
 RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
 RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
+			      CNXK_SSO_XAE_CNT "=<int>"
+			      CNXK_SSO_GGRP_QOS "=<string>"
+			      CNXK_SSO_STASH "=<string>");
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index ab7420ab79..be6a487b59 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -624,8 +624,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
 			   &dev->force_ena_bp);
 	rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag,
 			   &single_ws);
-	rte_kvargs_process(kvlist, CN10K_SSO_STASH,
-			   &parse_sso_kvargs_stash_dict, dev);
+	rte_kvargs_process(kvlist, CNXK_SSO_STASH, &parse_sso_kvargs_stash_dict,
+			   dev);
 	dev->dual_ws = !single_ws;
 	rte_kvargs_free(kvlist);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 904a9b022d..ba08fa2173 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -27,7 +27,7 @@
 #define CNXK_SSO_GGRP_QOS  "qos"
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
-#define CN10K_SSO_STASH	   "stash"
+#define CNXK_SSO_STASH	   "stash"
 
 #define CNXK_SSO_MAX_PROFILES 2
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 06/22] event/cnxk: add CN20K event port configuration
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (3 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
                             ` (16 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HWS a.k.a event port setup, release, link, unlink
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c         |  63 ++-----
 drivers/event/cnxk/cn20k_eventdev.c         | 174 ++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h         |  26 +++
 drivers/event/cnxk/cnxk_common.h            |  55 +++++++
 drivers/event/cnxk/cnxk_eventdev.h          |   6 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c |   6 +-
 6 files changed, 276 insertions(+), 54 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.h
 create mode 100644 drivers/event/cnxk/cnxk_common.h

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 49805dd91d..43bc6c0bac 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -2,15 +2,16 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include <rte_dmadev_pmd.h>
+
+#include "cn10k_cryptodev_ops.h"
+#include "cn10k_ethdev.h"
 #include "cn10k_tx_worker.h"
 #include "cn10k_worker.h"
-#include "cn10k_ethdev.h"
-#include "cn10k_cryptodev_ops.h"
+#include "cnxk_common.h"
+#include "cnxk_dma_event_dp.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
-#include "cnxk_dma_event_dp.h"
-
-#include <rte_dmadev_pmd.h>
 
 #define CN10K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                           \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
@@ -18,29 +19,6 @@
 #define CN10K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                           \
 	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
 
-static uint32_t
-cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
-{
-	uint32_t wdata = 1;
-
-	if (dev->deq_tmo_ns)
-		wdata |= BIT(16);
-
-	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_NONE:
-	default:
-		break;
-	case CN10K_GW_MODE_PREF:
-		wdata |= BIT(19);
-		break;
-	case CN10K_GW_MODE_PREF_WFE:
-		wdata |= BIT(20) | BIT(19);
-		break;
-	}
-
-	return wdata;
-}
-
 static void *
 cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -61,7 +39,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
 	ws->hws_id = port_id;
 	ws->swtag_req = 0;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
 	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
@@ -99,7 +77,7 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->xaq_lmt = dev->xaq_lmt;
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -220,12 +198,12 @@ cn10k_sso_hws_reset(void *arg, void *hws)
 	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
 
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
@@ -504,18 +482,7 @@ cn10k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
-	switch (event_dev->data->dev_conf.preschedule_type) {
-	default:
-	case RTE_EVENT_PRESCHEDULE_NONE:
-		dev->gw_mode = CN10K_GW_MODE_NONE;
-		break;
-	case RTE_EVENT_PRESCHEDULE:
-		dev->gw_mode = CN10K_GW_MODE_PREF;
-		break;
-	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
-		break;
-	}
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
 
 	rc = cnxk_setup_event_ports(event_dev, cn10k_sso_init_hws_mem,
 				    cn10k_sso_hws_setup);
@@ -598,13 +565,13 @@ cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	/* Check if we have work in PRF_WQE0, if so extract it. */
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
 		       BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index b876c36806..611906a4f0 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,7 +4,87 @@
 
 #include "roc_api.h"
 
+#include "cn20k_eventdev.h"
+#include "cnxk_common.h"
 #include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static void *
+cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws;
+
+	/* Allocate event port memory */
+	ws = rte_zmalloc("cn20k_ws", sizeof(struct cn20k_sso_hws) + RTE_CACHE_LINE_SIZE,
+			 RTE_CACHE_LINE_SIZE);
+	if (ws == NULL) {
+		plt_err("Failed to alloc memory for port=%d", port_id);
+		return NULL;
+	}
+
+	/* First cache line is reserved for cookie */
+	ws = (struct cn20k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
+	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+	ws->hws_id = port_id;
+	ws->swtag_req = 0;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->gw_rdata = SSO_TT_EMPTY << 32;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
+
+	return ws;
+}
+
+static int
+cn20k_sso_hws_link(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_link(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static int
+cn20k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_unlink(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static void
+cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t val;
+
+	ws->grp_base = grp_base;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+	ws->xaq_lmt = dev->xaq_lmt;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
+	ws->aw_lmt = dev->sso.lmt_base;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+
+	/* Set get_work timeout for HWS */
+	val = NSEC2USEC(dev->deq_tmo_ns);
+	val = val ? val - 1 : 0;
+	plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
+}
+
+static void
+cn20k_sso_hws_release(void *arg, void *hws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint16_t i, j;
+
+	for (i = 0; i < CNXK_SSO_MAX_PROFILES; i++)
+		for (j = 0; j < dev->nb_event_queues; j++)
+			roc_sso_hws_unlink(&dev->sso, ws->hws_id, &j, 1, i, 0);
+	memset(ws, 0, sizeof(*ws));
+}
 
 static void
 cn20k_sso_set_rsrc(void *arg)
@@ -60,11 +140,98 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
+
+	rc = cnxk_setup_event_ports(event_dev, cn20k_sso_init_hws_mem, cn20k_sso_hws_setup);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+	/* Restore any prior port-queue mapping. */
+	cnxk_sso_restore_links(event_dev, cn20k_sso_hws_link);
+
+	dev->configured = 1;
+	rte_mb();
+
+	return 0;
 cnxk_rsrc_fini:
 	roc_sso_rsrc_fini(&dev->sso);
+	dev->nb_event_ports = 0;
 	return rc;
 }
 
+static int
+cn20k_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
+		     const struct rte_event_port_conf *port_conf)
+{
+
+	RTE_SET_USED(port_conf);
+	return cnxk_sso_port_setup(event_dev, port_id, cn20k_sso_hws_setup);
+}
+
+static void
+cn20k_sso_port_release(void *port)
+{
+	struct cnxk_sso_hws_cookie *gws_cookie = cnxk_sso_hws_get_cookie(port);
+	struct cnxk_sso_evdev *dev;
+
+	if (port == NULL)
+		return;
+
+	dev = cnxk_sso_pmd_priv(gws_cookie->event_dev);
+	if (!gws_cookie->configured)
+		goto free;
+
+	cn20k_sso_hws_release(dev, port);
+	memset(gws_cookie, 0, sizeof(*gws_cookie));
+free:
+	rte_free(gws_cookie);
+}
+
+static int
+cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_links];
+	uint16_t link;
+
+	RTE_SET_USED(priorities);
+	for (link = 0; link < nb_links; link++)
+		hwgrp_ids[link] = queues[link];
+	nb_links = cn20k_sso_hws_link(dev, port, hwgrp_ids, nb_links, profile);
+
+	return (int)nb_links;
+}
+
+static int
+cn20k_sso_port_unlink_profile(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+			      uint16_t nb_unlinks, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_unlinks];
+	uint16_t unlink;
+
+	for (unlink = 0; unlink < nb_unlinks; unlink++)
+		hwgrp_ids[unlink] = queues[unlink];
+	nb_unlinks = cn20k_sso_hws_unlink(dev, port, hwgrp_ids, nb_unlinks, profile);
+
+	return (int)nb_unlinks;
+}
+
+static int
+cn20k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+		    const uint8_t priorities[], uint16_t nb_links)
+{
+	return cn20k_sso_port_link_profile(event_dev, port, queues, priorities, nb_links, 0);
+}
+
+static int
+cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+		      uint16_t nb_unlinks)
+{
+	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -75,6 +242,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.queue_attr_set = cnxk_sso_queue_attribute_set,
 
 	.port_def_conf = cnxk_sso_port_def_conf,
+	.port_setup = cn20k_sso_port_setup,
+	.port_release = cn20k_sso_port_release,
+	.port_link = cn20k_sso_port_link,
+	.port_unlink = cn20k_sso_port_unlink,
+	.port_link_profile = cn20k_sso_port_link_profile,
+	.port_unlink_profile = cn20k_sso_port_unlink_profile,
+	.timeout_ticks = cnxk_sso_timeout_ticks,
 };
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
new file mode 100644
index 0000000000..5b6c558d5a
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef __CN20K_EVENTDEV_H__
+#define __CN20K_EVENTDEV_H__
+
+#define CN20K_SSO_DEFAULT_STASH_OFFSET -1
+#define CN20K_SSO_DEFAULT_STASH_LENGTH 2
+
+struct __rte_cache_aligned cn20k_sso_hws {
+	uint64_t base;
+	uint32_t gw_wdata;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+	/* Add Work Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
+	uintptr_t aw_lmt;
+	uintptr_t grp_base;
+	uint16_t xae_waes;
+	int32_t xaq_lmt;
+};
+
+#endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
new file mode 100644
index 0000000000..712d82bee7
--- /dev/null
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CNXK_COMMON_H__
+#define __CNXK_COMMON_H__
+
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static uint32_t
+cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
+{
+	uint32_t wdata = 1;
+
+	if (dev->deq_tmo_ns)
+		wdata |= BIT(16);
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	case CNXK_GW_MODE_PREF:
+		wdata |= BIT(19);
+		break;
+	case CNXK_GW_MODE_PREF_WFE:
+		wdata |= BIT(20) | BIT(19);
+		break;
+	}
+
+	return wdata;
+}
+
+static uint8_t
+cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
+{
+	uint8_t gw_mode = 0;
+
+	switch (preschedule_type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		gw_mode = CNXK_GW_MODE_NONE;
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		gw_mode = CNXK_GW_MODE_PREF;
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		gw_mode = CNXK_GW_MODE_PREF_WFE;
+		break;
+	}
+
+	return gw_mode;
+}
+
+#endif /* __CNXK_COMMON_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index ba08fa2173..4066497e6b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -38,9 +38,9 @@
 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
 #define CN9K_DUAL_WS_NB_WS	     2
 
-#define CN10K_GW_MODE_NONE     0
-#define CN10K_GW_MODE_PREF     1
-#define CN10K_GW_MODE_PREF_WFE 2
+#define CNXK_GW_MODE_NONE     0
+#define CNXK_GW_MODE_PREF     1
+#define CNXK_GW_MODE_PREF_WFE 2
 
 #define CNXK_QOS_NORMALIZE(val, min, max, cnt)                                 \
 	(min + val / ((max + cnt - 1) / cnt))
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 311de3d92b..7a3262bcff 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1568,15 +1568,15 @@ cnxk_sso_selftest(const char *dev_name)
 
 	if (roc_model_runtime_is_cn10k()) {
 		printf("Verifying CN10K workslot getwork mode none\n");
-		dev->gw_mode = CN10K_GW_MODE_NONE;
+		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF;
+		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
+		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 07/22] event/cnxk: add CN20K SSO enqueue fast path
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (4 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
                             ` (15 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS fastpath event device enqueue functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  20 +-
 drivers/event/cnxk/cn20k_worker.c   | 384 ++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  21 ++
 drivers/event/cnxk/meson.build      |   1 +
 4 files changed, 425 insertions(+), 1 deletion(-)
 create mode 100644 drivers/event/cnxk/cn20k_worker.c
 create mode 100644 drivers/event/cnxk/cn20k_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 611906a4f0..a5dd03de6e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -5,6 +5,7 @@
 #include "roc_api.h"
 
 #include "cn20k_eventdev.h"
+#include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -108,6 +109,21 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+
+static void
+cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(RTE_ARCH_ARM64)
+
+	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
+	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
+	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -265,8 +281,10 @@ cn20k_sso_init(struct rte_eventdev *event_dev)
 
 	event_dev->dev_ops = &cn20k_sso_dev_ops;
 	/* For secondary processes, the primary has done all the work */
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		cn20k_sso_fp_fns_set(event_dev);
 		return 0;
+	}
 
 	rc = cnxk_sso_init(event_dev);
 	if (rc < 0)
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
new file mode 100644
index 0000000000..c7de493681
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include <rte_vect.h>
+
+#include "roc_api.h"
+
+#include "cn20k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+/* SSO Operations */
+
+static __rte_always_inline uint8_t
+cn20k_sso_hws_new_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint64_t event_ptr = ev->u64;
+	const uint16_t grp = ev->queue_id;
+
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
+	if (ws->xaq_lmt <= *ws->fc_mem)
+		return 0;
+
+	cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grp_base + (grp << 12));
+	return 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_swtag(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
+
+	/* CNXK model
+	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
+	 *
+	 * SSO_TT_ORDERED        norm           norm             untag
+	 * SSO_TT_ATOMIC         norm           norm		   untag
+	 * SSO_TT_UNTAGGED       norm           norm             NOOP
+	 */
+
+	if (new_tt == SSO_TT_UNTAGGED) {
+		if (cur_tt != SSO_TT_UNTAGGED)
+			cnxk_sso_hws_swtag_untag(ws->base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+	} else {
+		cnxk_sso_hws_swtag_norm(tag, new_tt, ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
+	}
+	ws->swtag_req = 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_group(struct cn20k_sso_hws *ws, const struct rte_event *ev, const uint16_t grp)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+
+	plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+	cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_forward_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint8_t grp = ev->queue_id;
+
+	/* Group hasn't changed, Use SWTAG to forward the event */
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
+		cn20k_sso_hws_fwd_swtag(ws, ev);
+	else
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
+		cn20k_sso_hws_fwd_group(ws, ev, grp);
+}
+
+static inline int32_t
+sso_read_xaq_space(struct cn20k_sso_hws *ws)
+{
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
+	       ws->xae_waes;
+}
+
+static inline void
+sso_lmt_aw_wait_fc(struct cn20k_sso_hws *ws, int64_t req)
+{
+	int64_t cached, refill;
+
+retry:
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
+		;
+
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
+	/* Check if there is enough space, else update and retry. */
+	if (cached < 0) {
+		/* Check if we have space else retry. */
+		do {
+			refill = sso_read_xaq_space(ws);
+		} while (refill <= 0);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
+
+		goto retry;
+	}
+}
+
+#define VECTOR_SIZE_BITS	     0xFFFFFFFFFFF80000ULL
+#define VECTOR_GET_LINE_OFFSET(line) (19 + (3 * line))
+
+static uint64_t
+vector_size_partial_mask(uint16_t off, uint16_t cnt)
+{
+	return (VECTOR_SIZE_BITS & ~(~0x0ULL << off)) | ((uint64_t)(cnt - 1) << off);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_new_event_lmtst(struct cn20k_sso_hws *ws, uint8_t queue_id,
+			      const struct rte_event ev[], uint16_t n)
+{
+	uint16_t lines, partial_line, burst, left;
+	uint64_t wdata[2], pa[2] = {0};
+	uintptr_t lmt_addr;
+	uint16_t sz0, sz1;
+	uint16_t lmt_id;
+
+	sz0 = sz1 = 0;
+	lmt_addr = ws->aw_lmt;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	left = n;
+again:
+	burst = RTE_MIN(BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2 + ROC_LMT_LINES_PER_CORE_LOG2), left);
+
+	/* Set wdata */
+	lines = burst >> ROC_SSO_AW_PER_LMT_LINE_LOG2;
+	partial_line = burst & (BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2) - 1);
+	wdata[0] = wdata[1] = 0;
+	if (lines > BIT(ROC_LMT_LINES_PER_STR_LOG2)) {
+		wdata[0] = lmt_id;
+		wdata[0] |= 15ULL << 12;
+		wdata[0] |= VECTOR_SIZE_BITS;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = 16 << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+
+		wdata[1] = lmt_id + 16;
+		pa[1] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+
+		lines -= 17;
+		wdata[1] |= partial_line ? (uint64_t)(lines + 1) << 12 : (uint64_t)(lines << 12);
+		wdata[1] |= partial_line ? vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								    partial_line) :
+					   VECTOR_SIZE_BITS;
+		sz1 = burst - sz0;
+		partial_line = 0;
+	} else if (lines) {
+		/* We need to handle two cases here:
+		 * 1. Partial line spill over to wdata[1] i.e. lines == 16
+		 * 2. Partial line with spill lines < 16.
+		 */
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = lines << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+		if (lines == 16) {
+			wdata[0] |= 15ULL << 12;
+			wdata[0] |= VECTOR_SIZE_BITS;
+			if (partial_line) {
+				wdata[1] = lmt_id + 16;
+				pa[1] = (ws->grp_base + (queue_id << 12) +
+					 SSO_LF_GGRP_OP_AW_LMTST) |
+					((partial_line - 1) << 4);
+			}
+		} else {
+			lines -= 1;
+			wdata[0] |= partial_line ? (uint64_t)(lines + 1) << 12 :
+						   (uint64_t)(lines << 12);
+			wdata[0] |= partial_line ?
+					    vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								     partial_line) :
+					    VECTOR_SIZE_BITS;
+			sz0 += partial_line;
+		}
+		sz1 = burst - sz0;
+		partial_line = 0;
+	}
+
+	/* Only partial lines */
+	if (partial_line) {
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) |
+			((partial_line - 1) << 4);
+		sz0 = partial_line;
+		sz1 = burst - sz0;
+	}
+
+#if defined(RTE_ARCH_ARM64)
+	uint64x2_t aw_mask = {0xC0FFFFFFFFULL, ~0x0ULL};
+	uint64x2_t tt_mask = {0x300000000ULL, 0};
+	uint16_t parts;
+
+	while (burst) {
+		parts = burst > 7 ? 8 : plt_align32prevpow2(burst);
+		burst -= parts;
+		/* Lets try to fill at least one line per burst. */
+		switch (parts) {
+		case 8: {
+			uint64x2_t aw0, aw1, aw2, aw3, aw4, aw5, aw6, aw7;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+			aw4 = vandq_u64(vld1q_u64((const uint64_t *)&ev[4]), aw_mask);
+			aw5 = vandq_u64(vld1q_u64((const uint64_t *)&ev[5]), aw_mask);
+			aw6 = vandq_u64(vld1q_u64((const uint64_t *)&ev[6]), aw_mask);
+			aw7 = vandq_u64(vld1q_u64((const uint64_t *)&ev[7]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+			aw4 = vorrq_u64(vandq_u64(vshrq_n_u64(aw4, 6), tt_mask), aw4);
+			aw5 = vorrq_u64(vandq_u64(vshrq_n_u64(aw5, 6), tt_mask), aw5);
+			aw6 = vorrq_u64(vandq_u64(vshrq_n_u64(aw6, 6), tt_mask), aw6);
+			aw7 = vorrq_u64(vandq_u64(vshrq_n_u64(aw7, 6), tt_mask), aw7);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 64), aw4);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 80), aw5);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 96), aw6);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 112), aw7);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 128);
+		} break;
+		case 4: {
+			uint64x2_t aw0, aw1, aw2, aw3;
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 64);
+		} break;
+		case 2: {
+			uint64x2_t aw0, aw1;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 32);
+		} break;
+		case 1: {
+			__uint128_t aw0;
+
+			aw0 = ev[0].u64;
+			aw0 <<= 64;
+			aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+			aw0 |= (uint64_t)ev[0].sched_type << 32;
+
+			*((__uint128_t *)lmt_addr) = aw0;
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+		} break;
+		}
+		ev += parts;
+	}
+#else
+	uint16_t i;
+
+	for (i = 0; i < burst; i++) {
+		__uint128_t aw0;
+
+		aw0 = ev[0].u64;
+		aw0 <<= 64;
+		aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+		aw0 |= (uint64_t)ev[0].sched_type << 32;
+		*((__uint128_t *)lmt_addr) = aw0;
+		lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+	}
+#endif
+
+	/* wdata[0] will be always valid */
+	sso_lmt_aw_wait_fc(ws, sz0);
+	roc_lmt_submit_steorl(wdata[0], pa[0]);
+	if (wdata[1]) {
+		sso_lmt_aw_wait_fc(ws, sz1);
+		roc_lmt_submit_steorl(wdata[1], pa[1]);
+	}
+
+	left -= (sz0 + sz1);
+	if (left)
+		goto again;
+
+	return n;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	switch (ev->op) {
+	case RTE_EVENT_OP_NEW:
+		return cn20k_sso_hws_new_event(ws, ev);
+	case RTE_EVENT_OP_FORWARD:
+		cn20k_sso_hws_forward_event(ws, ev);
+		break;
+	case RTE_EVENT_OP_RELEASE:
+		if (ws->swtag_req) {
+			cnxk_sso_hws_desched(ev->u64, ws->base);
+			ws->swtag_req = 0;
+			break;
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	uint16_t idx = 0, done = 0, rc = 0;
+	struct cn20k_sso_hws *ws = port;
+	uint8_t queue_id;
+	int32_t space;
+
+	/* Do a common back-pressure check and return */
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
+	if (space <= 0)
+		return 0;
+	nb_events = space < nb_events ? space : nb_events;
+
+	do {
+		queue_id = ev[idx].queue_id;
+		for (idx = idx + 1; idx < nb_events; idx++)
+			if (queue_id != ev[idx].queue_id)
+				break;
+
+		rc = cn20k_sso_hws_new_event_lmtst(ws, queue_id, &ev[done], idx - done);
+		if (rc != (idx - done))
+			return rc + done;
+		done += rc;
+
+	} while (done < nb_events);
+
+	return done;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	cn20k_sso_hws_forward_event(ws, ev);
+
+	return 1;
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
new file mode 100644
index 0000000000..5ff8f11b38
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_WORKER_H__
+#define __CN20K_WORKER_H__
+
+#include <rte_eventdev.h>
+
+#include "cnxk_worker.h"
+#include "cn20k_eventdev.h"
+
+/* CN20K Fastpath functions. */
+uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
+					   uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 21cd5c5ae6..d0dc2320e1 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -232,6 +232,7 @@ endif
 if soc_type == 'cn20k' or soc_type == 'all'
 sources += files(
         'cn20k_eventdev.c',
+        'cn20k_worker.c',
 )
 endif
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 08/22] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (5 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
                             ` (14 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS event dequeue fastpath functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |   5 +
 drivers/event/cnxk/cn20k_worker.c   |  54 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   | 137 +++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a5dd03de6e..d1668a00c1 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -114,11 +114,16 @@ static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
+	if (dev->deq_tmo_ns)
+		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
+
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index c7de493681..2dcde0b444 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -382,3 +382,57 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 
 	return 1;
 }
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(timeout_ticks);
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return 1;
+	}
+
+	return cn20k_sso_hws_get_work(ws, ev, 0);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, 0);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, 0);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5ff8f11b38..8dc60a06ec 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -7,8 +7,136 @@
 
 #include <rte_eventdev.h>
 
-#include "cnxk_worker.h"
 #include "cn20k_eventdev.h"
+#include "cnxk_worker.h"
+
+static __rte_always_inline void
+cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
+{
+	RTE_SET_USED(ws);
+	RTE_SET_USED(flags);
+
+	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
+		 (u64[0] & 0xffffffff);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+	gw.get_work = ws->gw_wdata;
+#if defined(RTE_ARCH_ARM64)
+#if defined(__clang__)
+	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbz %[x0], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(".arch armv8-a+lse\n"
+		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+	gw.u64[0] = x0;
+	gw.u64[1] = x1;
+#else
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbz %[wdata], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [wdata] "=&r"(gw.get_work)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+		     : [wdata] "+r"(gw.get_work)
+		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+#endif
+#else
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
+#endif
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
+
+/* Used in cleaning up workslot. */
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work_empty(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+#ifdef RTE_ARCH_ARM64
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbz %[tag], 63, .Ldone%=		\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbnz %[tag], 63, .Lrty%=		\n"
+		     ".Ldone%=:	dmb ld					\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
+		     : "memory");
+#else
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+#endif
+
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+	else
+		gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+			    (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
 
 /* CN20K Fastpath functions. */
 uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
@@ -18,4 +146,11 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 
+uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+					   uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
+					       uint16_t nb_events, uint64_t timeout_ticks);
+
 #endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 09/22] event/cnxk: add CN20K event port quiesce
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (6 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
                             ` (13 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port quiesce function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 60 +++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index d1668a00c1..56e3eb87fb 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -208,6 +208,65 @@ cn20k_sso_port_release(void *port)
 	rte_free(gws_cookie);
 }
 
+static void
+cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+		       rte_eventdev_port_flush_t flush_cb, void *args)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct cn20k_sso_hws *ws = port;
+	struct rte_event ev;
+	uint64_t ptag;
+	bool is_pend;
+
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+		is_pend = true;
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	if (is_pend && ev.u64)
+		if (flush_cb)
+			flush_cb(event_dev->data->dev_id, ev, args);
+	ptag = (plt_read64(ws->base + SSOW_LF_GWS_TAG) >> 32) & SSO_TT_EMPTY;
+	if (ptag != SSO_TT_EMPTY)
+		cnxk_sso_hws_swtag_flush(ws->base);
+
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & BIT_ULL(56));
+
+	/* Check if we have work in PRF_WQE0, if so extract it. */
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (ev.u64) {
+			if (flush_cb)
+				flush_cb(event_dev->data->dev_id, ev, args);
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		do {
+			ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (ptag & BIT_ULL(56));
+	}
+	ws->swtag_req = 0;
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
 			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
@@ -265,6 +324,7 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_def_conf = cnxk_sso_port_def_conf,
 	.port_setup = cn20k_sso_port_setup,
 	.port_release = cn20k_sso_port_release,
+	.port_quiesce = cn20k_sso_port_quiesce,
 	.port_link = cn20k_sso_port_link,
 	.port_unlink = cn20k_sso_port_unlink,
 	.port_link_profile = cn20k_sso_port_link_profile,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 10/22] event/cnxk: add CN20K event port profile switch
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (7 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
                             ` (12 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port profile switch.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  1 +
 drivers/event/cnxk/cn20k_worker.c   | 11 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  1 +
 3 files changed, 13 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 56e3eb87fb..53b0b43199 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -124,6 +124,7 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 	if (dev->deq_tmo_ns)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
+	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2dcde0b444..2c723523d2 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -383,6 +383,17 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 	return 1;
 }
 
+int __rte_hot
+cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(0xFFUL);
+	ws->gw_wdata |= (profile + 1);
+
+	return 0;
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 8dc60a06ec..447f28f0f2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -145,6 +145,7 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 					       uint16_t nb_events);
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
+int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 11/22] event/cnxk: add CN20K event port preschedule
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (8 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 12/22] event/cnxk: add CN20K device start pbhagavatula
                             ` (11 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port preschedule modify and preschedule
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/ssow.h       |  1 +
 drivers/event/cnxk/cn20k_eventdev.c |  2 ++
 drivers/event/cnxk/cn20k_worker.c   | 30 +++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  3 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
index c146a8c3ef..ec6bd7896b 100644
--- a/drivers/common/cnxk/hw/ssow.h
+++ b/drivers/common/cnxk/hw/ssow.h
@@ -37,6 +37,7 @@
 #define SSOW_LF_GWS_PRF_WQE1	     (0x448ull) /* [CN10K, .) */
 #define SSOW_LF_GWS_OP_GET_WORK0     (0x600ull)
 #define SSOW_LF_GWS_OP_GET_WORK1     (0x608ull) /* [CN10K, .) */
+#define SSOW_LF_GWS_OP_PRF_GETWORK   (0x610ull) /* [CN20K, .) */
 #define SSOW_LF_GWS_OP_SWTAG_FLUSH   (0x800ull)
 #define SSOW_LF_GWS_OP_SWTAG_UNTAG   (0x810ull)
 #define SSOW_LF_GWS_OP_SWTP_CLR	     (0x820ull)
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 53b0b43199..a788eeed63 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -125,6 +125,8 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
+	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
+	event_dev->preschedule = cn20k_sso_hws_preschedule;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2c723523d2..ebfe863bc5 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -394,6 +394,36 @@ cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
 	return 0;
 }
 
+int __rte_hot
+cn20k_sso_hws_preschedule_modify(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(BIT(19) | BIT(20));
+	switch (type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		ws->gw_wdata |= BIT(19);
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		ws->gw_wdata |= BIT(19) | BIT(20);
+		break;
+	}
+
+	return 0;
+}
+
+void __rte_hot
+cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(type);
+	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 447f28f0f2..dd8b72bc53 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -146,6 +146,9 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
+int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
+					       enum rte_event_dev_preschedule_type type);
+void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 12/22] event/cnxk: add CN20K device start
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (9 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
                             ` (10 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K start function along with few cleanup API's to maintain
sanity.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c | 103 +--------------------------
 drivers/event/cnxk/cn20k_eventdev.c |  76 ++++++++++++++++++++
 drivers/event/cnxk/cnxk_common.h    | 104 ++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 100 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 43bc6c0bac..f2e591f547 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -154,83 +154,6 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
 	return 0;
 }
 
-static void
-cn10k_sso_hws_reset(void *arg, void *hws)
-{
-	struct cnxk_sso_evdev *dev = arg;
-	struct cn10k_sso_hws *ws = hws;
-	uintptr_t base = ws->base;
-	uint64_t pend_state;
-	union {
-		__uint128_t wdata;
-		uint64_t u64[2];
-	} gw;
-	uint8_t pend_tt;
-	bool is_pend;
-
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
-	/* Wait till getwork/swtp/waitw/desched completes. */
-	is_pend = false;
-	/* Work in WQE0 is always consumed, unless its a SWTAG. */
-	pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
-	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
-	    ws->swtag_req)
-		is_pend = true;
-
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
-			       BIT_ULL(56) | BIT_ULL(54)));
-	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
-			cnxk_sso_hws_swtag_untag(base +
-						 SSOW_LF_GWS_OP_SWTAG_UNTAG);
-		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-	} else if (pend_tt != SSO_TT_EMPTY) {
-		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-	}
-
-	/* Wait for desched to complete. */
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
-
-	switch (dev->gw_mode) {
-	case CNXK_GW_MODE_PREF:
-	case CNXK_GW_MODE_PREF_WFE:
-		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
-			;
-		break;
-	case CNXK_GW_MODE_NONE:
-	default:
-		break;
-	}
-
-	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
-	    SSO_TT_EMPTY) {
-		plt_write64(BIT_ULL(16) | 1,
-			    ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		do {
-			roc_load_pair(gw.u64[0], gw.u64[1],
-				      ws->base + SSOW_LF_GWS_WQE0);
-		} while (gw.u64[0] & BIT_ULL(63));
-		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-			if (pend_tt == SSO_TT_ATOMIC ||
-			    pend_tt == SSO_TT_ORDERED)
-				cnxk_sso_hws_swtag_untag(
-					base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
-			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-		}
-	}
-
-	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	rte_mb();
-}
-
 static void
 cn10k_sso_set_rsrc(void *arg)
 {
@@ -640,24 +563,6 @@ cn10k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn10k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
-static void
-cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev)
-{
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
-	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
-	int i, rc;
-
-	plt_sso_dbg();
-	for (i = 0; i < dev->stash_cnt; i++) {
-		stash[i].hwgrp = dev->stash_parse_data[i].queue;
-		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
-		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
-	}
-	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
-	if (rc < 0)
-		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
-}
-
 static int
 cn10k_sso_start(struct rte_eventdev *event_dev)
 {
@@ -669,9 +574,8 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
 	if (rc < 0)
 		return rc;
 
-	cn10k_sso_configure_queue_stash(event_dev);
-	rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
-			    cn10k_sso_hws_flush_events);
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 	if (rc < 0)
 		return rc;
 	cn10k_sso_fp_fns_set(event_dev);
@@ -692,8 +596,7 @@ cn10k_sso_stop(struct rte_eventdev *event_dev)
 	for (i = 0; i < event_dev->data->nb_ports; i++)
 		hws[i] = i;
 	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
-	cnxk_sso_stop(event_dev, cn10k_sso_hws_reset,
-		      cn10k_sso_hws_flush_events);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 }
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a788eeed63..69c593ed60 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -87,6 +87,61 @@ cn20k_sso_hws_release(void *arg, void *hws)
 	memset(ws, 0, sizeof(*ws));
 }
 
+static int
+cn20k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cnxk_handle_event_t fn,
+			   void *arg)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg);
+	uint64_t retry = CNXK_SSO_FLUSH_RETRY_MAX;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t cq_ds_cnt = 1;
+	uint64_t aq_cnt = 1;
+	uint64_t ds_cnt = 1;
+	struct rte_event ev;
+	uint64_t val, req;
+
+	plt_write64(0, base + SSO_LF_GGRP_QCTL);
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	req = queue_id;	    /* GGRP ID */
+	req |= BIT_ULL(18); /* Grouped */
+	req |= BIT_ULL(16); /* WAIT */
+
+	aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+	ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+	cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+	cq_ds_cnt &= 0x3FFF3FFF0000;
+
+	while (aq_cnt || cq_ds_cnt || ds_cnt) {
+		plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (fn != NULL && ev.u64 != 0)
+			fn(arg, ev);
+		if (ev.sched_type != SSO_TT_EMPTY)
+			cnxk_sso_hws_swtag_flush(ws->base);
+		else if (retry-- == 0)
+			break;
+		do {
+			val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (val & BIT_ULL(56));
+		aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+		ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+		cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+		/* Extract cq and ds count */
+		cq_ds_cnt &= 0x3FFF3FFF0000;
+	}
+
+	if (aq_cnt || cq_ds_cnt || ds_cnt)
+		return -EAGAIN;
+
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	rte_mb();
+
+	return 0;
+}
+
 static void
 cn20k_sso_set_rsrc(void *arg)
 {
@@ -315,6 +370,25 @@ cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
+static int
+cn20k_sso_start(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int rc, i;
+
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set(event_dev);
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -333,6 +407,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_link_profile = cn20k_sso_port_link_profile,
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
+
+	.dev_start = cn20k_sso_start,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
index 712d82bee7..c361d0530d 100644
--- a/drivers/event/cnxk/cnxk_common.h
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -8,6 +8,15 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+struct cnxk_sso_hws_prf {
+	uint64_t base;
+	uint32_t gw_wdata;
+	void *lookup_mem;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+};
+
 static uint32_t
 cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
 {
@@ -52,4 +61,99 @@ cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
 	return gw_mode;
 }
 
+static void
+cnxk_sso_hws_reset(void *arg, void *ws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cnxk_sso_hws_prf *ws_prf;
+	uint64_t pend_state;
+	uint8_t swtag_req;
+	uintptr_t base;
+	uint8_t hws_id;
+	union {
+		__uint128_t wdata;
+		uint64_t u64[2];
+	} gw;
+	uint8_t pend_tt;
+	bool is_pend;
+
+	ws_prf = ws;
+	base = ws_prf->base;
+	hws_id = ws_prf->hws_id;
+	swtag_req = ws_prf->swtag_req;
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	/* Wait till getwork/swtp/waitw/desched completes. */
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || swtag_req)
+		is_pend = true;
+
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state &
+		 (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+			cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+	} else if (pend_tt != SSO_TT_EMPTY) {
+		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+	}
+
+	/* Wait for desched to complete. */
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, base + SSOW_LF_GWS_OP_GET_WORK0);
+		do {
+			roc_load_pair(gw.u64[0], gw.u64[1], base + SSOW_LF_GWS_WQE0);
+		} while (gw.u64[0] & BIT_ULL(63));
+		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+			if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+				cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+		}
+	}
+
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	rte_mb();
+}
+
+static void
+cnxk_sso_configure_queue_stash(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
+	int i, rc;
+
+	plt_sso_dbg();
+	for (i = 0; i < dev->stash_cnt; i++) {
+		stash[i].hwgrp = dev->stash_parse_data[i].queue;
+		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
+		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
+	}
+	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
+	if (rc < 0)
+		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+}
+
 #endif /* __CNXK_COMMON_H__ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 13/22] event/cnxk: add CN20K device stop and close
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (10 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 12/22] event/cnxk: add CN20K device start pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
                             ` (9 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event device stop and close callback functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 69c593ed60..6195b29705 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -389,6 +389,25 @@ cn20k_sso_start(struct rte_eventdev *event_dev)
 	return rc;
 }
 
+static void
+cn20k_sso_stop(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int i;
+
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+}
+
+static int
+cn20k_sso_close(struct rte_eventdev *event_dev)
+{
+	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -409,6 +428,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
 	.dev_start = cn20k_sso_start,
+	.dev_stop = cn20k_sso_stop,
+	.dev_close = cn20k_sso_close,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 14/22] event/cnxk: add CN20K xstats, selftest and dump
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (11 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
                             ` (8 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add selftest to verify SSO, xstats to get queue specific
stats and add function to dump internal state of SSO.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test/test_eventdev.c                    |  7 +++++++
 drivers/event/cnxk/cn20k_eventdev.c         | 12 ++++++++++++
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  8 ++++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c
index b03a62fe70..e97754bd47 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -1521,6 +1521,12 @@ test_eventdev_selftest_cn10k(void)
 	return test_eventdev_selftest_impl("event_cn10k", "");
 }
 
+static int
+test_eventdev_selftest_cn20k(void)
+{
+	return test_eventdev_selftest_impl("event_cn20k", "");
+}
+
 #endif /* !RTE_EXEC_ENV_WINDOWS */
 
 REGISTER_FAST_TEST(eventdev_common_autotest, true, true, test_eventdev_common);
@@ -1532,5 +1538,6 @@ REGISTER_DRIVER_TEST(eventdev_selftest_dpaa2, test_eventdev_selftest_dpaa2);
 REGISTER_DRIVER_TEST(eventdev_selftest_dlb2, test_eventdev_selftest_dlb2);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn9k, test_eventdev_selftest_cn9k);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn10k, test_eventdev_selftest_cn10k);
+REGISTER_DRIVER_TEST(eventdev_selftest_cn20k, test_eventdev_selftest_cn20k);
 
 #endif /* !RTE_EXEC_ENV_WINDOWS */
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 6195b29705..793098bd61 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -408,6 +408,12 @@ cn20k_sso_close(struct rte_eventdev *event_dev)
 	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
 }
 
+static int
+cn20k_sso_selftest(void)
+{
+	return cnxk_sso_selftest(RTE_STR(event_cn20k));
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -427,9 +433,15 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.xstats_get = cnxk_sso_xstats_get,
+	.xstats_reset = cnxk_sso_xstats_reset,
+	.xstats_get_names = cnxk_sso_xstats_get_names,
+
+	.dump = cnxk_sso_dump,
 	.dev_start = cn20k_sso_start,
 	.dev_stop = cn20k_sso_stop,
 	.dev_close = cn20k_sso_close,
+	.dev_selftest = cn20k_sso_selftest,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 7a3262bcff..8f3d0982e9 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1566,16 +1566,16 @@ cnxk_sso_selftest(const char *dev_name)
 			return rc;
 	}
 
-	if (roc_model_runtime_is_cn10k()) {
-		printf("Verifying CN10K workslot getwork mode none\n");
+	if (roc_model_runtime_is_cn10k() || roc_model_runtime_is_cn20k()) {
+		printf("Verifying %s workslot getwork mode none\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode prefetch\n");
+		printf("Verifying %s workslot getwork mode prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
+		printf("Verifying %s workslot getwork mode smart prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 15/22] event/cnxk: support CN20K Rx adapter
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (12 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
                             ` (7 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for CN20K event eth Rx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 121 +++++++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_eventdev.h |   4 +
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 793098bd61..602fbd6359 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,6 +4,7 @@
 
 #include "roc_api.h"
 
+#include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
@@ -414,6 +415,117 @@ cn20k_sso_selftest(void)
 	return cnxk_sso_selftest(RTE_STR(event_cn20k));
 }
 
+static int
+cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+			      const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+	int rc;
+
+	RTE_SET_USED(event_dev);
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 9);
+	if (rc)
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+	else
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+	return 0;
+}
+
+static void
+cn20k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		ws->xaq_lmt = dev->xaq_lmt;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+		ws->tstamp = dev->tstamp;
+		if (lookup_mem)
+			ws->lookup_mem = lookup_mem;
+	}
+}
+
+static void
+eventdev_fops_tstamp_update(struct rte_eventdev *event_dev)
+{
+	struct rte_event_fp_ops *fp_op = rte_event_fp_ops + event_dev->data->dev_id;
+
+	fp_op->dequeue_burst = event_dev->dequeue_burst;
+}
+
+static void
+cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct cnxk_eth_dev *cnxk_eth_dev = dev->data->dev_private;
+	struct rte_eventdev *event_dev = cnxk_eth_dev->evdev_priv;
+	struct cnxk_sso_evdev *evdev = cnxk_sso_pmd_priv(event_dev);
+
+	evdev->rx_offloads |= flags;
+	if (ptp_en)
+		evdev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
+	else
+		evdev->tstamp[port_id] = NULL;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	eventdev_fops_tstamp_update(event_dev);
+}
+
+static int
+cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
+			       const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash;
+	struct cn20k_eth_rxq *rxq;
+	void *lookup_mem;
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	if (rc)
+		return -EINVAL;
+
+	cnxk_eth_dev->cnxk_sso_ptp_tstamp_cb = cn20k_sso_tstamp_hdl_update;
+	cnxk_eth_dev->evdev_priv = (struct rte_eventdev *)(uintptr_t)event_dev;
+
+	rxq = eth_dev->data->rx_queues[0];
+	lookup_mem = rxq->lookup_mem;
+	cn20k_sso_set_priv_mem(event_dev, lookup_mem);
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	if (roc_feature_sso_has_stash() && dev->nb_event_ports > 1) {
+		stash.hwgrp = queue_conf->ev.queue_id;
+		stash.stash_offset = CN20K_SSO_DEFAULT_STASH_OFFSET;
+		stash.stash_count = CN20K_SSO_DEFAULT_STASH_LENGTH;
+		rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1);
+		if (rc < 0)
+			plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+	}
+
+	return 0;
+}
+
+static int
+cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id)
+{
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -433,6 +545,12 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.eth_rx_adapter_caps_get = cn20k_sso_rx_adapter_caps_get,
+	.eth_rx_adapter_queue_add = cn20k_sso_rx_adapter_queue_add,
+	.eth_rx_adapter_queue_del = cn20k_sso_rx_adapter_queue_del,
+	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -509,4 +627,5 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
-			      CNXK_SSO_STASH "=<string>");
+			      CNXK_SSO_STASH "=<string>"
+			      CNXK_SSO_FORCE_BP "=1");
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 5b6c558d5a..7a6363a89e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -11,9 +11,13 @@
 struct __rte_cache_aligned cn20k_sso_hws {
 	uint64_t base;
 	uint32_t gw_wdata;
+	void *lookup_mem;
 	uint64_t gw_rdata;
 	uint8_t swtag_req;
 	uint8_t hws_id;
+	/* PTP timestamp */
+	struct cnxk_timesync_info **tstamp;
+	uint64_t meta_aura;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
 	int64_t __rte_atomic *fc_cache_space;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 16/22] event/cnxk: support CN20K Rx adapter fast path
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (13 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
                             ` (6 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c           | 122 ++++++++++++-
 drivers/event/cnxk/cn20k_worker.c             |  54 ------
 drivers/event/cnxk/cn20k_worker.h             | 165 +++++++++++++++++-
 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c |  22 +++
 .../cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c   |  22 +++
 .../event/cnxk/deq/cn20k/deq_112_127_burst.c  |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_seg_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_tmo_burst.c    |  22 +++
 .../deq/cn20k/deq_112_127_tmo_seg_burst.c     |  22 +++
 .../event/cnxk/deq/cn20k/deq_16_31_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_32_47_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_burst.c      |  23 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_48_63_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_64_79_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_80_95_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_96_111_burst.c   |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_seg_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_all_offload.c    |  65 +++++++
 drivers/event/cnxk/meson.build                |  43 +++++
 37 files changed, 1085 insertions(+), 69 deletions(-)
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 602fbd6359..408014036a 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -11,6 +11,9 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+#define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
+	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -165,21 +168,124 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+#if defined(RTE_ARCH_ARM64)
+static inline void
+cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
+{
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	const event_dequeue_burst_t sso_hws_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_seg_burst);
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_seg_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_deq_seg_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_seg_burst);
+		}
+	} else {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst, sso_hws_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_burst);
+		}
+	}
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
+static inline void
+cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
+	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
+		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+#endif
 
 static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	cn20k_sso_fp_blk_fns_set(event_dev);
+	cn20k_sso_fp_tmplt_fns_set(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
-	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
-	if (dev->deq_tmo_ns)
-		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
-
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
 	event_dev->preschedule = cn20k_sso_hws_preschedule;
@@ -286,7 +392,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
 	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
 
-	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	cn20k_sso_hws_get_work_empty(ws, &ev,
+				     (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 	if (is_pend && ev.u64)
 		if (flush_cb)
 			flush_cb(event_dev->data->dev_id, ev, args);
@@ -312,7 +419,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
 		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		cn20k_sso_hws_get_work_empty(
+			ws, &ev, (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 		if (ev.u64) {
 			if (flush_cb)
 				flush_cb(event_dev->data->dev_id, ev, args);
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index ebfe863bc5..53daf3b4b0 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -423,57 +423,3 @@ cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
 	RTE_SET_USED(type);
 	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
 }
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return 1;
-	}
-
-	return cn20k_sso_hws_get_work(ws, ev, 0);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return ret;
-	}
-
-	ret = cn20k_sso_hws_get_work(ws, ev, 0);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn20k_sso_hws_get_work(ws, ev, 0);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			    uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index dd8b72bc53..9075073fd2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -8,16 +8,64 @@
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
+#include "cn20k_rx.h"
 #include "cnxk_worker.h"
 
+/* CN20K Rx event fastpath */
+
+static __rte_always_inline void
+cn20k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id, const uint32_t tag,
+		  const uint32_t flags, const void *const lookup_mem, uintptr_t cpth,
+		  uintptr_t sa_base)
+{
+	const uint64_t mbuf_init =
+		0x100010000ULL | RTE_PKTMBUF_HEADROOM | (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+	struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf;
+
+	cn20k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, (struct rte_mbuf *)mbuf, lookup_mem,
+			      mbuf_init | ((uint64_t)port_id) << 48, cpth, sa_base, flags);
+}
+
+static void
+cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info *tstamp)
+{
+	uint64_t tstamp_ptr;
+	uint8_t laptr;
+
+	laptr = (uint8_t)*(uint64_t *)(u64 + (CNXK_SSO_WQE_LAYR_PTR * sizeof(uint64_t)));
+	if (laptr == sizeof(uint64_t)) {
+		/* Extracting tstamp, if PTP enabled*/
+		tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64) + CNXK_SSO_WQE_SG_PTR);
+		cn20k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, true,
+					 (uint64_t *)tstamp_ptr);
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
-	RTE_SET_USED(ws);
-	RTE_SET_USED(flags);
+	uintptr_t sa_base = 0;
 
 	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
 		 (u64[0] & 0xffffffff);
+	if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		uintptr_t cpth = 0;
+		uint64_t mbuf;
+
+		mbuf = u64[1] - sizeof(struct rte_mbuf);
+		rte_prefetch0((void *)mbuf);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(((struct rte_mbuf *)mbuf)->pool, (void **)&mbuf, 1, 1);
+
+		u64[0] = CNXK_CLR_SUB_EVENT(u64[0]);
+		cn20k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xFFFFF, flags, ws->lookup_mem, cpth,
+				  sa_base);
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
+		u64[1] = mbuf;
+	}
 }
 
 static __rte_always_inline uint16_t
@@ -150,11 +198,112 @@ int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
 					       enum rte_event_dev_preschedule_type type);
 void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
-uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-					   uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-					       uint16_t nb_events, uint64_t timeout_ticks);
+#define R(name, flags)                                                                             \
+	uint16_t __rte_hot cn20k_sso_hws_deq_burst_##name(                                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_burst_##name(                                      \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_seg_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_seg_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_seg_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_seg_burst_##name(                              \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_burst_##name(                                    \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_seg_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_seg_burst_##name(                            \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_seg_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_seg_burst_##name(                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_DEQ(fn, flags)                                                                         \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(timeout_ticks);                                                       \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return 1;                                                                  \
+		}                                                                                  \
+		return cn20k_sso_hws_get_work(ws, ev, flags);                                      \
+	}
+
+#define SSO_DEQ_SEG(fn, flags) SSO_DEQ(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define SSO_DEQ_TMO(fn, flags)                                                                     \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint16_t ret = 1;                                                                  \
+		uint64_t iter;                                                                     \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return ret;                                                                \
+		}                                                                                  \
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);                                       \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)                         \
+			ret = cn20k_sso_hws_get_work(ws, ev, flags);                               \
+		return ret;                                                                        \
+	}
+
+#define SSO_DEQ_TMO_SEG(fn, flags) SSO_DEQ_TMO(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define R(name, flags)                                                                             \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                                                   \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)                              \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                                           \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                                           \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)                                   \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_CMN_DEQ_BURST(fnb, fn, flags)                                                          \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+#define SSO_CMN_DEQ_SEG_BURST(fnb, fn, flags)                                                      \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[],
+						       uint16_t nb_events, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[],
+							   uint16_t nb_events,
+							   uint64_t timeout_ticks);
 
 #endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
new file mode 100644
index 0000000000..f7e0e8fe71
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
new file mode 100644
index 0000000000..7d5d4823c3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		      cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
new file mode 100644
index 0000000000..1bdc4bc82d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
new file mode 100644
index 0000000000..d3ed5fcac0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                                             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,                                  \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)                                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,                             \
+			  cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
new file mode 100644
index 0000000000..29c21441cf
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
new file mode 100644
index 0000000000..004b5ecb95
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
new file mode 100644
index 0000000000..d544b39e9e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
new file mode 100644
index 0000000000..ba7a1207ad
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
new file mode 100644
index 0000000000..eb7382e9d9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
new file mode 100644
index 0000000000..770b7221e6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
new file mode 100644
index 0000000000..1e71d22fc3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
new file mode 100644
index 0000000000..1a9e7efa0a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
new file mode 100644
index 0000000000..3d51bd6659
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
new file mode 100644
index 0000000000..851b5b7d31
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
new file mode 100644
index 0000000000..038ba726a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name,                   \
+			  flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
new file mode 100644
index 0000000000..68fb3ff53d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
new file mode 100644
index 0000000000..84f3ccd39c
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
new file mode 100644
index 0000000000..417f622412
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
new file mode 100644
index 0000000000..7fbea69134
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
new file mode 100644
index 0000000000..3bee216768
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
new file mode 100644
index 0000000000..9b341a0df5
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
new file mode 100644
index 0000000000..1f051f74a9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			   cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
new file mode 100644
index 0000000000..c134e27f25
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
new file mode 100644
index 0000000000..849e8e12fc
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		 cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
new file mode 100644
index 0000000000..9724caf5d6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
new file mode 100644
index 0000000000..997c208511
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
new file mode 100644
index 0000000000..bcf32e646b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
new file mode 100644
index 0000000000..b24e73439a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
new file mode 100644
index 0000000000..c03d034b66
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
new file mode 100644
index 0000000000..b37ef7a998
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
new file mode 100644
index 0000000000..da76b589a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
new file mode 100644
index 0000000000..3a8c02e4d2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_all_offload.c b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
new file mode 100644
index 0000000000..3983736b7e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[], uint16_t nb_events,
+				    uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[], uint16_t nb_events,
+					uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_TSTAMP_F | NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index d0dc2320e1..a2bafab268 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -234,6 +234,49 @@ sources += files(
         'cn20k_eventdev.c',
         'cn20k_worker.c',
 )
+
+if host_machine.cpu_family().startswith('aarch') and not disable_template
+sources += files(
+        'deq/cn20k/deq_0_15_burst.c',
+        'deq/cn20k/deq_16_31_burst.c',
+        'deq/cn20k/deq_32_47_burst.c',
+        'deq/cn20k/deq_48_63_burst.c',
+        'deq/cn20k/deq_64_79_burst.c',
+        'deq/cn20k/deq_80_95_burst.c',
+        'deq/cn20k/deq_96_111_burst.c',
+        'deq/cn20k/deq_112_127_burst.c',
+        'deq/cn20k/deq_0_15_seg_burst.c',
+        'deq/cn20k/deq_16_31_seg_burst.c',
+        'deq/cn20k/deq_32_47_seg_burst.c',
+        'deq/cn20k/deq_48_63_seg_burst.c',
+        'deq/cn20k/deq_64_79_seg_burst.c',
+        'deq/cn20k/deq_80_95_seg_burst.c',
+        'deq/cn20k/deq_96_111_seg_burst.c',
+        'deq/cn20k/deq_112_127_seg_burst.c',
+        'deq/cn20k/deq_0_15_tmo_burst.c',
+        'deq/cn20k/deq_16_31_tmo_burst.c',
+        'deq/cn20k/deq_32_47_tmo_burst.c',
+        'deq/cn20k/deq_48_63_tmo_burst.c',
+        'deq/cn20k/deq_64_79_tmo_burst.c',
+        'deq/cn20k/deq_80_95_tmo_burst.c',
+        'deq/cn20k/deq_96_111_tmo_burst.c',
+        'deq/cn20k/deq_112_127_tmo_burst.c',
+        'deq/cn20k/deq_0_15_tmo_seg_burst.c',
+        'deq/cn20k/deq_16_31_tmo_seg_burst.c',
+        'deq/cn20k/deq_32_47_tmo_seg_burst.c',
+        'deq/cn20k/deq_48_63_tmo_seg_burst.c',
+        'deq/cn20k/deq_64_79_tmo_seg_burst.c',
+        'deq/cn20k/deq_80_95_tmo_seg_burst.c',
+        'deq/cn20k/deq_96_111_tmo_seg_burst.c',
+        'deq/cn20k/deq_112_127_tmo_seg_burst.c',
+        'deq/cn20k/deq_all_offload.c',
+)
+
+else
+sources += files(
+        'deq/cn20k/deq_all_offload.c',
+)
+endif
 endif
 
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 17/22] event/cnxk: support CN20K Tx adapter
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (14 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
                             ` (5 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c  | 126 +++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h  |   4 +
 drivers/event/cnxk/cn20k_tx_worker.h |  16 ++++
 3 files changed, 146 insertions(+)
 create mode 100644 drivers/event/cnxk/cn20k_tx_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 408014036a..509c6ea630 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -6,6 +6,7 @@
 
 #include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
+#include "cn20k_tx_worker.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
@@ -168,6 +169,35 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+static int
+cn20k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	if (dev->tx_adptr_data == NULL)
+		return 0;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		void *ws_cookie;
+
+		ws_cookie = cnxk_sso_hws_get_cookie(ws);
+		ws_cookie = rte_realloc_socket(ws_cookie,
+					       sizeof(struct cnxk_sso_hws_cookie) +
+						       sizeof(struct cn20k_sso_hws) +
+						       dev->tx_adptr_data_sz,
+					       RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+		if (ws_cookie == NULL)
+			return -ENOMEM;
+		ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+		memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, dev->tx_adptr_data_sz);
+		event_dev->data->ports[i] = ws;
+	}
+
+	return 0;
+}
+
 #if defined(RTE_ARCH_ARM64)
 static inline void
 cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
@@ -634,6 +664,95 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+			      uint32_t *caps)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		*caps = 0;
+	else
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+	return 0;
+}
+
+static void
+cn20k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cn20k_eth_txq *txq;
+	struct roc_nix_sq *sq;
+	int i;
+
+	if (tx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+			cn20k_sso_txq_fc_update(eth_dev, i);
+	} else {
+		uint16_t sqes_per_sqb;
+
+		sq = &cnxk_eth_dev->sqs[tx_queue_id];
+		txq = eth_dev->data->tx_queues[tx_queue_id];
+		sqes_per_sqb = 1U << txq->sqes_per_sqb_log2;
+		if (cnxk_eth_dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
+			sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc / sqes_per_sqb);
+		txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
+	}
+}
+
+static int
+cn20k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint64_t tx_offloads;
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+
+	/* Can't enable tstamp if all the ports don't have it enabled. */
+	tx_offloads = cnxk_eth_dev->tx_offload_flags;
+	if (dev->tx_adptr_configured) {
+		uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+		uint8_t tstmp_ena = !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+		if (tstmp_ena && !tstmp_req)
+			dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+		else if (!tstmp_ena && tstmp_req)
+			tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+	}
+
+	dev->tx_offloads |= tx_offloads;
+	cn20k_sso_txq_fc_update(eth_dev, tx_queue_id);
+	rc = cn20k_sso_updt_tx_adptr_data(event_dev);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	dev->tx_adptr_configured = 1;
+
+	return 0;
+}
+
+static int
+cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+	return cn20k_sso_updt_tx_adptr_data(event_dev);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -659,6 +778,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
+	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
+	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
+	.eth_tx_adapter_start = cnxk_sso_tx_adapter_start,
+	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
+	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 7a6363a89e..8ea2878fa5 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -25,6 +25,10 @@ struct __rte_cache_aligned cn20k_sso_hws {
 	uintptr_t grp_base;
 	uint16_t xae_waes;
 	int32_t xaq_lmt;
+	/* Tx Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
+	uint64_t lso_tun_fmt;
+	uint8_t tx_adptr_data[];
 };
 
 #endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
new file mode 100644
index 0000000000..63fbdf5328
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_TX_WORKER_H__
+#define __CN20K_TX_WORKER_H__
+
+#include <rte_eventdev.h>
+#include <rte_vect.h>
+
+#include "cn20k_eventdev.h"
+#include "cn20k_tx.h"
+#include "cnxk_eventdev_dp.h"
+#include <rte_event_eth_tx_adapter.h>
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 18/22] event/cnxk: support CN20K Tx adapter fast path
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (15 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 19/22] common/cnxk: add SSO event aggregator pbhagavatula
                             ` (4 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c          |  29 +++
 drivers/event/cnxk/cn20k_tx_worker.h         | 176 +++++++++++++++++++
 drivers/event/cnxk/meson.build               |  20 +++
 drivers/event/cnxk/tx/cn20k/tx_0_15.c        |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c    |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127.c     |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111.c      |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c  |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_all_offload.c |  40 +++++
 20 files changed, 561 insertions(+)
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 509c6ea630..5d49a5e5c6 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -15,6 +15,9 @@
 #define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
 
+#define CN20K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                                               \
+	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -253,6 +256,19 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 #undef R
 	};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq_seg[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_seg_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
 	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
 		if (dev->rx_offloads & NIX_RX_REAS_F) {
 			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
@@ -285,6 +301,12 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 		}
 	}
 
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq_seg);
+	else
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq);
+
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 #else
 	RTE_SET_USED(event_dev);
 #endif
@@ -299,6 +321,13 @@ cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
 	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
 	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
 		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+	event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	if (dev->tx_offloads & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | NIX_TX_OFFLOAD_VLAN_QINQ_F |
+				NIX_TX_OFFLOAD_TSO_F | NIX_TX_OFFLOAD_TSTAMP_F)) {
+		event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+		event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+	}
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 63fbdf5328..c8ab560b0e 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -13,4 +13,180 @@
 #include "cnxk_eventdev_dp.h"
 #include <rte_event_eth_tx_adapter.h>
 
+/* CN20K Tx event fastpath */
+
+static __rte_always_inline struct cn20k_eth_txq *
+cn20k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
+{
+	return (struct cn20k_eth_txq *)(txq_data[(txq_data[m->port] >> 48) +
+						 rte_event_eth_tx_adapter_txq_get(m)] &
+					(BIT_ULL(48) - 1));
+}
+
+static __rte_always_inline void
+cn20k_sso_txq_fc_wait(const struct cn20k_eth_txq *txq)
+{
+	int64_t avail;
+
+#ifdef RTE_ARCH_ARM64
+	int64_t val;
+
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.gt .Ldne%=				\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.le .Lrty%=				\n"
+		     ".Ldne%=:						\n"
+		     : [refill] "=&r"(avail), [val] "=&r" (val)
+		     : [addr] "r" (txq->fc_mem), [adj] "r" (txq->nb_sqb_bufs_adj),
+		       [shft] "r" (txq->sqes_per_sqb_log2)
+		     : "memory");
+#else
+	do {
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
+	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
+#endif
+}
+
+static __rte_always_inline int32_t
+cn20k_sso_sq_depth(const struct cn20k_eth_txq *txq)
+{
+	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
+	return (avail << txq->sqes_per_sqb_log2) - avail;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
+		 uintptr_t lmt_addr, uint8_t sched_type, const uint64_t *txq_data,
+		 const uint32_t flags)
+{
+	uint8_t lnum = 0, loff = 0, shft = 0;
+	struct rte_mbuf *extm = NULL;
+	struct cn20k_eth_txq *txq;
+	uintptr_t laddr;
+	uint16_t segdw;
+	uintptr_t pa;
+	bool sec;
+
+	txq = cn20k_sso_hws_xtract_meta(m, txq_data);
+	if (cn20k_sso_sq_depth(txq) <= 0)
+		return 0;
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1);
+
+	cn20k_nix_tx_skeleton(txq, cmd, flags, 0);
+	/* Perform header writes before barrier
+	 * for TSO
+	 */
+	if (flags & NIX_TX_OFFLOAD_TSO_F)
+		cn20k_nix_xmit_prepare_tso(m, flags);
+
+	cn20k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, &sec, txq->mark_flag,
+			       txq->mark_fmt);
+
+	laddr = lmt_addr;
+	/* Prepare CPT instruction and get nixtx addr if
+	 * it is for CPT on same lmtline.
+	 */
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff, &shft, txq->sa_base,
+				   flags);
+
+	/* Move NIX desc to LMT/NIXTX area */
+	cn20k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
+
+	if (flags & NIX_TX_MULTI_SEG_F)
+		segdw = cn20k_nix_prepare_mseg(txq, m, &extm, (uint64_t *)laddr, flags);
+	else
+		segdw = cn20k_nix_tx_ext_subs(flags) + 2;
+
+	cn20k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		pa = txq->cpt_io_addr | 3 << 4;
+	else
+		pa = txq->io_addr | ((segdw - 1) << 4);
+
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
+
+	cn20k_sso_txq_fc_wait(txq);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_sec_fc_wait_one(txq);
+
+	roc_lmt_submit_steorl(lmt_id, pa);
+
+	/* Memory barrier to make sure lmtst store completes */
+	rte_io_wmb();
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+		cn20k_nix_free_extmbuf(extm);
+
+	return 1;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
+		       const uint64_t *txq_data, const uint32_t flags)
+{
+	struct rte_mbuf *m;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+
+	lmt_addr = ws->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	m = ev->mbuf;
+	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
+}
+
+#define T(name, sz, flags)                                                                         \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_##name(void *port, struct rte_event ev[],    \
+							     uint16_t nb_events);                  \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
+#define SSO_TX(fn, sz, flags)                                                                      \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint64_t cmd[sz];                                                                  \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data, flags);         \
+	}
+
+#define SSO_TX_SEG(fn, sz, flags)                                                                  \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];                               \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data,                 \
+					      (flags) | NIX_TX_MULTI_SEG_F);                       \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[],
+							      uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+								  uint16_t nb_events);
+
 #endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index a2bafab268..8aaf8116f7 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -272,9 +272,29 @@ sources += files(
         'deq/cn20k/deq_all_offload.c',
 )
 
+sources += files(
+        'tx/cn20k/tx_0_15.c',
+        'tx/cn20k/tx_16_31.c',
+        'tx/cn20k/tx_32_47.c',
+        'tx/cn20k/tx_48_63.c',
+        'tx/cn20k/tx_64_79.c',
+        'tx/cn20k/tx_80_95.c',
+        'tx/cn20k/tx_96_111.c',
+        'tx/cn20k/tx_112_127.c',
+        'tx/cn20k/tx_0_15_seg.c',
+        'tx/cn20k/tx_16_31_seg.c',
+        'tx/cn20k/tx_32_47_seg.c',
+        'tx/cn20k/tx_48_63_seg.c',
+        'tx/cn20k/tx_64_79_seg.c',
+        'tx/cn20k/tx_80_95_seg.c',
+        'tx/cn20k/tx_96_111_seg.c',
+        'tx/cn20k/tx_112_127_seg.c',
+        'tx/cn20k/tx_all_offload.c',
+)
 else
 sources += files(
         'deq/cn20k/deq_all_offload.c',
+        'tx/cn20k/tx_all_offload.c',
 )
 endif
 endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15.c b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
new file mode 100644
index 0000000000..b681bc8ab0
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
new file mode 100644
index 0000000000..1dacb63d4b
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127.c b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
new file mode 100644
index 0000000000..abdb8b76a1
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
new file mode 100644
index 0000000000..c39d331b25
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31.c b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
new file mode 100644
index 0000000000..5b88c47914
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
new file mode 100644
index 0000000000..13f00ac478
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47.c b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
new file mode 100644
index 0000000000..1f6008c425
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
new file mode 100644
index 0000000000..587f22df3a
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63.c b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
new file mode 100644
index 0000000000..c712825417
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
new file mode 100644
index 0000000000..1fc11ec904
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79.c b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
new file mode 100644
index 0000000000..0e427f79d8
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
new file mode 100644
index 0000000000..6e1ae41b26
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95.c b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
new file mode 100644
index 0000000000..8c87d2341d
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
new file mode 100644
index 0000000000..43a143f4bd
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111.c b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
new file mode 100644
index 0000000000..1a43af8b02
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
new file mode 100644
index 0000000000..e0e1d8a4ef
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_all_offload.c b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
new file mode 100644
index 0000000000..d2158a4256
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[], uint16_t nb_events)
+{
+	const uint32_t flags = (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_MBUF_NOFF_F |
+				NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+					       uint16_t nb_events)
+{
+	const uint32_t flags =
+		(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+		 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_MBUF_NOFF_F | NIX_TX_OFFLOAD_TSO_F |
+		 NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_SECURITY_F | NIX_TX_MULTI_SEG_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 19/22] common/cnxk: add SSO event aggregator
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (16 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
                             ` (3 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add configuration APIs for CN20K SSO event
aggregator which allows SSO to generate event
vectors.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/sso.h        |  33 ++++
 drivers/common/cnxk/roc_mbox.h      |  21 +++
 drivers/common/cnxk/roc_model.h     |  13 ++
 drivers/common/cnxk/roc_nix_queue.c |   5 -
 drivers/common/cnxk/roc_sso.c       | 230 +++++++++++++++++++++++++++-
 drivers/common/cnxk/roc_sso.h       |  19 ++-
 drivers/common/cnxk/roc_sso_priv.h  |   4 +
 drivers/common/cnxk/version.map     |   4 +
 8 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/drivers/common/cnxk/hw/sso.h b/drivers/common/cnxk/hw/sso.h
index 09b8d4955f..79337a8a3b 100644
--- a/drivers/common/cnxk/hw/sso.h
+++ b/drivers/common/cnxk/hw/sso.h
@@ -146,6 +146,7 @@
 #define SSO_LF_GGRP_OP_ADD_WORK0 (0x0ull)
 #define SSO_LF_GGRP_OP_ADD_WORK1 (0x8ull)
 #define SSO_LF_GGRP_QCTL	 (0x20ull)
+#define SSO_LF_GGRP_TAG_CFG	 (0x40ull)
 #define SSO_LF_GGRP_EXE_DIS	 (0x80ull)
 #define SSO_LF_GGRP_INT		 (0x100ull)
 #define SSO_LF_GGRP_INT_W1S	 (0x108ull)
@@ -159,6 +160,10 @@
 #define SSO_LF_GGRP_MISC_CNT	 (0x200ull)
 #define SSO_LF_GGRP_OP_AW_LMTST	 (0x400ull)
 
+#define SSO_LF_GGRP_AGGR_CFG	    (0x300ull)
+#define SSO_LF_GGRP_AGGR_CTX_BASE   (0x308ull)
+#define SSO_LF_GGRP_AGGR_CTX_INSTOP (0x310ull)
+
 #define SSO_AF_IAQ_FREE_CNT_MASK      0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_MASK     0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_SHIFT    16
@@ -230,5 +235,33 @@
 #define SSO_TT_ATOMIC	(0x1ull)
 #define SSO_TT_UNTAGGED (0x2ull)
 #define SSO_TT_EMPTY	(0x3ull)
+#define SSO_TT_AGG	(0x3ull)
+
+#define SSO_LF_AGGR_INSTOP_FLUSH	(0x0ull)
+#define SSO_LF_AGGR_INSTOP_EVICT	(0x1ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_FLUSH (0x2ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_EVICT (0x3ull)
+
+#define SSO_AGGR_CTX_SZ	    16
+#define SSO_AGGR_NUM_CTX(a) (1 << (a + 6))
+#define SSO_AGGR_MIN_CTX    SSO_AGGR_NUM_CTX(0)
+#define SSO_AGGR_MAX_CTX    SSO_AGGR_NUM_CTX(10)
+#define SSO_AGGR_DEF_TMO    0x3Full
+
+struct sso_agq_ctx {
+	uint64_t ena : 1;
+	uint64_t rsvd_1_3 : 3;
+	uint64_t vwqe_aura : 17;
+	uint64_t rsvd_21_31 : 11;
+	uint64_t tag : 32;
+	uint64_t tt : 2;
+	uint64_t rsvd_66_67 : 2;
+	uint64_t swqe_tag : 12;
+	uint64_t max_vsize_exp : 4;
+	uint64_t vtimewait : 12;
+	uint64_t xqe_type : 4;
+	uint64_t cnt_ena : 1;
+	uint64_t rsvd_101_127 : 27;
+};
 
 #endif /* __SSO_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 63139b5517..db6e8f07b3 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,10 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_AGGR_SET_CONFIG, 0x615, sso_aggr_setconfig, sso_aggr_setconfig,  \
+	  msg_rsp)                                                             \
+	M(SSO_AGGR_GET_STATS, 0x616, sso_aggr_get_stats, sso_info_req,         \
+	  sso_aggr_stats)                                                      \
 	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
@@ -2191,6 +2195,13 @@ struct sso_grp_stash_cfg {
 	uint8_t __io num_linesm1 : 4;
 };
 
+struct sso_aggr_setconfig {
+	struct mbox_msghdr hdr;
+	uint16_t __io npa_pf_func;
+	uint16_t __io hwgrp;
+	uint64_t __io rsvd[2];
+};
+
 struct sso_grp_stats {
 	struct mbox_msghdr hdr;
 	uint16_t __io grp;
@@ -2210,6 +2221,16 @@ struct sso_hws_stats {
 	uint64_t __io arbitration;
 };
 
+struct sso_aggr_stats {
+	struct mbox_msghdr hdr;
+	uint16_t __io grp;
+	uint64_t __io flushed;
+	uint64_t __io completed;
+	uint64_t __io npa_fail;
+	uint64_t __io timeout;
+	uint64_t __io rsvd[4];
+};
+
 /* CPT mailbox error codes
  * Range 901 - 1000.
  */
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 4e686bea2c..0de141b0cc 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include "roc_bits.h"
+#include "roc_constants.h"
 
 extern struct roc_model *roc_model;
 
@@ -157,6 +158,18 @@ roc_model_is_cn20k(void)
 	return roc_model_runtime_is_cn20k();
 }
 
+static inline uint16_t
+roc_model_optimal_align_sz(void)
+{
+	if (roc_model_is_cn9k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn10k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn20k())
+		return ROC_ALIGN << 1;
+	return 128;
+}
+
 static inline uint64_t
 roc_model_is_cn98xx(void)
 {
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 06029275af..e852211ba4 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -794,9 +794,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 		aq->rq.good_utag = rq->tag_mask >> 24;
 		aq->rq.bad_utag = rq->tag_mask >> 24;
 		aq->rq.ltag = rq->tag_mask & BITMASK_ULL(24, 0);
-
-		if (rq->vwqe_ena)
-			aq->rq.wqe_aura = roc_npa_aura_handle_to_aura(rq->vwqe_aura_handle);
 	} else {
 		/* CQ mode */
 		aq->rq.sso_ena = 0;
@@ -881,8 +878,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 			aq->rq_mask.good_utag = ~aq->rq_mask.good_utag;
 			aq->rq_mask.bad_utag = ~aq->rq_mask.bad_utag;
 			aq->rq_mask.ltag = ~aq->rq_mask.ltag;
-			if (rq->vwqe_ena)
-				aq->rq_mask.wqe_aura = ~aq->rq_mask.wqe_aura;
 		} else {
 			/* CQ mode */
 			aq->rq_mask.sso_ena = ~aq->rq_mask.sso_ena;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 45cf6fc39e..4996329018 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -500,9 +500,231 @@ roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 	mbox_put(mbox);
 }
 
+static void
+sso_agq_op_wait(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	uint64_t reg;
+
+	reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	while (reg & BIT_ULL(2)) {
+		plt_delay_us(100);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) +
+				 SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	}
+}
+
+int
+roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_agq_data *data)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	uint32_t cnt, off;
+	struct mbox *mbox;
+	uintptr_t ptr;
+	uint64_t reg;
+	int rc;
+
+	if (sso->agg_mem[hwgrp] == 0) {
+		mbox = mbox_get(sso->dev.mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			mbox_process(mbox);
+			req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+			if (req == NULL) {
+				plt_err("Failed to allocate AGQ config mbox.");
+				mbox_put(mbox);
+				return -EIO;
+			}
+		}
+
+		req->hwgrp = hwgrp;
+		req->npa_pf_func = idev_npa_pffunc_get();
+		rc = mbox_process(mbox);
+		if (rc < 0) {
+			plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+			mbox_put(mbox);
+			return rc;
+		}
+
+		mbox_put(mbox);
+
+		sso->agg_mem[hwgrp] =
+			(uintptr_t)plt_zmalloc(SSO_AGGR_MIN_CTX * sizeof(struct sso_agq_ctx),
+					       roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0)
+			return -ENOMEM;
+		sso->agg_cnt[hwgrp] = SSO_AGGR_MIN_CTX;
+		sso->agg_used[hwgrp] = 0;
+		plt_wmb();
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = (plt_log2_u32(SSO_AGGR_MIN_CTX) - 6) << 16;
+		reg |= (SSO_AGGR_DEF_TMO << 4) | 1;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	}
+
+	if (sso->agg_cnt[hwgrp] >= SSO_AGGR_MAX_CTX)
+		return -ENOSPC;
+
+	if (sso->agg_cnt[hwgrp] == sso->agg_used[hwgrp]) {
+		ptr = sso->agg_mem[hwgrp];
+		cnt = sso->agg_cnt[hwgrp] << 1;
+		sso->agg_mem[hwgrp] = (uintptr_t)plt_zmalloc(cnt * sizeof(struct sso_agq_ctx),
+							     roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0) {
+			sso->agg_mem[hwgrp] = ptr;
+			return -ENOMEM;
+		}
+
+		memcpy((void *)sso->agg_mem[hwgrp], (void *)ptr,
+		       sso->agg_cnt[hwgrp] * sizeof(struct sso_agq_ctx));
+		plt_wmb();
+		sso_agq_op_wait(roc_sso, hwgrp);
+		/* Base address has changed, evict old entries. */
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg &= ~GENMASK_ULL(19, 16);
+		reg |= (uint64_t)(plt_log2_u32(cnt) - 6) << 16;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg = SSO_LF_AGGR_INSTOP_GLOBAL_EVICT << 4;
+		plt_write64(reg,
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+		sso_agq_op_wait(roc_sso, hwgrp);
+		plt_free((void *)ptr);
+
+		sso->agg_cnt[hwgrp] = cnt;
+		off = sso->agg_used[hwgrp];
+	} else {
+		ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+		for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+			if (!ctx[cnt].ena)
+				break;
+		}
+		if (cnt == sso->agg_cnt[hwgrp])
+			return -EINVAL;
+		off = cnt;
+	}
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += off;
+	ctx->ena = 1;
+	ctx->tt = data->tt;
+	ctx->tag = data->tag;
+	ctx->swqe_tag = data->stag;
+	ctx->cnt_ena = data->cnt_ena;
+	ctx->xqe_type = data->xqe_type;
+	ctx->vtimewait = data->vwqe_wait_tmo;
+	ctx->vwqe_aura = data->vwqe_aura;
+	ctx->max_vsize_exp = data->vwqe_max_sz_exp - 2;
+
+	plt_wmb();
+	sso->agg_used[hwgrp]++;
+
+	return 0;
+}
+
+void
+roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint64_t reg;
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += agq_id;
+
+	if (!ctx->ena)
+		return;
+
+	reg = SSO_LF_AGGR_INSTOP_FLUSH << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+
+	memset(ctx, 0, sizeof(struct sso_agq_ctx));
+	plt_wmb();
+	sso->agg_used[hwgrp]--;
+
+	/* Flush the context from CTX Cache */
+	reg = SSO_LF_AGGR_INSTOP_EVICT << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+}
+
+void
+roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	struct mbox *mbox;
+	uint32_t cnt;
+	int rc;
+
+	if (!roc_sso->feat.eva_present)
+		return;
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+		if (!ctx[cnt].ena)
+			continue;
+		roc_sso_hwgrp_agq_free(roc_sso, hwgrp, cnt);
+	}
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+	plt_free((void *)sso->agg_mem[hwgrp]);
+	sso->agg_mem[hwgrp] = 0;
+	sso->agg_cnt[hwgrp] = 0;
+	sso->agg_used[hwgrp] = 0;
+
+	mbox = mbox_get(sso->dev.mbox);
+	req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+	if (req == NULL) {
+		mbox_process(mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			plt_err("Failed to allocate AGQ config mbox.");
+			mbox_put(mbox);
+			return;
+		}
+	}
+
+	req->hwgrp = hwgrp;
+	req->npa_pf_func = 0;
+	rc = mbox_process(mbox);
+	if (rc < 0)
+		plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+	mbox_put(mbox);
+}
+
+uint32_t
+roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag_mask,
+			   uint8_t xqe_type)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint32_t i;
+
+	plt_rmb();
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (i = 0; i < sso->agg_used[hwgrp]; i++) {
+		if (!ctx[i].ena)
+			continue;
+		if (ctx[i].tag == tag_mask && ctx[i].xqe_type == xqe_type)
+			return i;
+	}
+
+	return UINT32_MAX;
+}
+
 int
-roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
-			struct roc_sso_hwgrp_stats *stats)
+roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_hwgrp_stats *stats)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
 	struct sso_grp_stats *req_rsp;
@@ -1058,10 +1280,14 @@ void
 roc_sso_rsrc_fini(struct roc_sso *roc_sso)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	uint32_t cnt;
 
 	if (!roc_sso->nb_hws && !roc_sso->nb_hwgrp)
 		return;
 
+	for (cnt = 0; cnt < roc_sso->nb_hwgrp; cnt++)
+		roc_sso_hwgrp_agq_release(roc_sso, cnt);
+
 	sso_unregister_irqs_priv(roc_sso, sso->pci_dev->intr_handle,
 				 roc_sso->nb_hws, roc_sso->nb_hwgrp);
 	sso_lf_free(&sso->dev, SSO_LF_TYPE_HWS, roc_sso->nb_hws);
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 021db22c86..f73128087a 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -47,6 +47,17 @@ struct roc_sso_xaq_data {
 	void *mem;
 };
 
+struct roc_sso_agq_data {
+	uint8_t tt;
+	uint8_t cnt_ena;
+	uint8_t xqe_type;
+	uint16_t stag;
+	uint32_t tag;
+	uint32_t vwqe_max_sz_exp;
+	uint64_t vwqe_wait_tmo;
+	uint64_t vwqe_aura;
+};
+
 struct roc_sso {
 	struct plt_pci_device *pci_dev;
 	/* Public data. */
@@ -100,6 +111,12 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 					 uint16_t nb_stash);
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
+int __roc_api roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp,
+				      struct roc_sso_agq_data *data);
+void __roc_api roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id);
+void __roc_api roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp);
+uint32_t __roc_api roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag,
+					      uint8_t xqe_type);
 
 /* Utility function */
 uint16_t __roc_api roc_sso_pf_func_get(void);
@@ -107,7 +124,7 @@ uint16_t __roc_api roc_sso_pf_func_get(void);
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
-int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
+int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp,
 				      struct roc_sso_hwgrp_stats *stats);
 int __roc_api roc_sso_hws_stats_get(struct roc_sso *roc_sso, uint8_t hws,
 				    struct roc_sso_hws_stats *stats);
diff --git a/drivers/common/cnxk/roc_sso_priv.h b/drivers/common/cnxk/roc_sso_priv.h
index 21c59c57e6..d6dc6dedd3 100644
--- a/drivers/common/cnxk/roc_sso_priv.h
+++ b/drivers/common/cnxk/roc_sso_priv.h
@@ -13,6 +13,10 @@ struct sso_rsrc {
 struct sso {
 	struct plt_pci_device *pci_dev;
 	struct dev dev;
+	/* EVA memory area */
+	uintptr_t agg_mem[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_used[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_cnt[MAX_RVU_BLKLF_CNT];
 	/* Interrupt handler args. */
 	struct sso_rsrc hws_rsrc[MAX_RVU_BLKLF_CNT];
 	struct sso_rsrc hwgrp_rsrc[MAX_RVU_BLKLF_CNT];
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index de748ac409..14ee6031e2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -500,6 +500,10 @@ INTERNAL {
 	roc_sso_dev_fini;
 	roc_sso_dev_init;
 	roc_sso_dump;
+	roc_sso_hwgrp_agq_alloc;
+	roc_sso_hwgrp_agq_free;
+	roc_sso_hwgrp_agq_from_tag;
+	roc_sso_hwgrp_agq_release;
 	roc_sso_hwgrp_alloc_xaq;
 	roc_sso_hwgrp_base_get;
 	roc_sso_hwgrp_free_xaq_aura;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 20/22] event/cnxk: add Rx/Tx event vector support
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (17 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 19/22] common/cnxk: add SSO event aggregator pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 21/22] common/cnxk: update timer base code pbhagavatula
                             ` (2 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add Event vector support for CN20K Rx/Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c      | 185 ++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_tx_worker.h     |  84 ++++++++++
 drivers/event/cnxk/cn20k_worker.h        |  63 ++++++++
 drivers/event/cnxk/cnxk_eventdev.h       |   3 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  16 +-
 5 files changed, 340 insertions(+), 11 deletions(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 5d49a5e5c6..57e15b6d8c 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -75,6 +75,7 @@ cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = dev->sso.lmt_base;
 	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->lmt_base = dev->sso.lmt_base;
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -595,7 +596,8 @@ cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
 	else
 		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
 			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
-			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -641,6 +643,156 @@ cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
 	eventdev_fops_tstamp_update(event_dev);
 }
 
+static int
+cn20k_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, uint16_t port_id,
+		     const struct rte_event_eth_rx_adapter_queue_conf *queue_conf, int agq)
+{
+	struct roc_nix_rq *rq;
+	uint32_t tag_mask;
+	uint16_t wqe_skip;
+	uint8_t tt;
+	int rc;
+
+	rq = &cnxk_eth_dev->rqs[rq_id];
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+		tag_mask = agq;
+		tt = SSO_TT_AGG;
+		rq->flow_tag_width = 0;
+	} else {
+		tag_mask = (port_id & 0xFF) << 20;
+		tag_mask |= (RTE_EVENT_TYPE_ETHDEV << 28);
+		tt = queue_conf->ev.sched_type;
+		rq->flow_tag_width = 20;
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) {
+			rq->flow_tag_width = 0;
+			tag_mask |= queue_conf->ev.flow_id;
+		}
+	}
+
+	rq->tag_mask = tag_mask;
+	rq->sso_ena = 1;
+	rq->tt = tt;
+	rq->hwgrp = queue_conf->ev.queue_id;
+	wqe_skip = RTE_ALIGN_CEIL(sizeof(struct rte_mbuf), ROC_CACHE_LINE_SZ);
+	wqe_skip = wqe_skip / ROC_CACHE_LINE_SZ;
+	rq->wqe_skip = wqe_skip;
+
+	rc = roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+	return rc;
+}
+
+static int
+cn20k_sso_rx_adapter_vwqe_enable(struct cnxk_sso_evdev *dev, uint16_t port_id, uint16_t rq_id,
+				 const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	uint32_t agq, tag_mask, stag_mask;
+	struct roc_sso_agq_data data;
+	int rc;
+
+	tag_mask = (port_id & 0xff) << 20;
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)
+		tag_mask |= queue_conf->ev.flow_id;
+	else
+		tag_mask |= rq_id;
+
+	stag_mask = tag_mask;
+	tag_mask |= RTE_EVENT_TYPE_ETHDEV_VECTOR << 28;
+	stag_mask |= RTE_EVENT_TYPE_ETHDEV << 28;
+
+	memset(&data, 0, sizeof(struct roc_sso_agq_data));
+	data.tag = tag_mask;
+	data.tt = queue_conf->ev.sched_type;
+	data.stag = stag_mask;
+	data.vwqe_aura = roc_npa_aura_handle_to_aura(queue_conf->vector_mp->pool_id);
+	data.vwqe_max_sz_exp = rte_log2_u32(queue_conf->vector_sz);
+	data.vwqe_wait_tmo = queue_conf->vector_timeout_ns / ((SSO_AGGR_DEF_TMO + 1) * 100);
+	data.xqe_type = 0;
+
+	rc = roc_sso_hwgrp_agq_alloc(&dev->sso, queue_conf->ev.queue_id, &data);
+	if (rc < 0)
+		return rc;
+
+	agq = roc_sso_hwgrp_agq_from_tag(&dev->sso, queue_conf->ev.queue_id, tag_mask, 0);
+	return agq;
+}
+
+static int
+cn20k_rx_adapter_queue_add(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id,
+			   const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t port = eth_dev->data->port_id;
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	int i, rc = 0, agq = 0;
+
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			rc |= cn20k_rx_adapter_queue_add(event_dev, eth_dev, i, queue_conf);
+	} else {
+		rxq_sp = cnxk_eth_rxq_to_sp(eth_dev->data->rx_queues[rx_queue_id]);
+		cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+		rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+			cnxk_sso_updt_xae_cnt(dev, queue_conf->vector_mp,
+					      RTE_EVENT_TYPE_ETHDEV_VECTOR);
+			rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+			if (rc < 0)
+				return rc;
+
+			rc = cn20k_sso_rx_adapter_vwqe_enable(dev, port, rx_queue_id, queue_conf);
+			if (rc < 0)
+				return rc;
+			agq = rc;
+		}
+
+		rc = cn20k_sso_rxq_enable(cnxk_eth_dev, (uint16_t)rx_queue_id, port, queue_conf,
+					  agq);
+
+		/* Propagate force bp devarg */
+		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
+		cnxk_sso_tstamp_cfg(port, eth_dev, dev);
+		cnxk_eth_dev->nb_rxq_sso++;
+	}
+
+	if (rc < 0) {
+		plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+			queue_conf->ev.queue_id);
+		return rc;
+	}
+
+	dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+	return 0;
+}
+
+static int
+cn20k_rx_adapter_queue_del(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_nix_rq *rxq;
+	int i, rc = 0;
+
+	RTE_SET_USED(event_dev);
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			cn20k_rx_adapter_queue_del(event_dev, eth_dev, i);
+	} else {
+		rxq = &cnxk_eth_dev->rqs[rx_queue_id];
+		if (rxq->tt == SSO_TT_AGG)
+			roc_sso_hwgrp_agq_free(&dev->sso, rxq->hwgrp, rxq->tag_mask);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
+		cnxk_eth_dev->nb_rxq_sso--;
+	}
+
+	if (rc < 0)
+		plt_err("Failed to clear Rx adapter config port=%d, q=%d", eth_dev->data->port_id,
+			rx_queue_id);
+	return rc;
+}
+
 static int
 cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
@@ -657,7 +809,7 @@ cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	rc = cn20k_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
 	if (rc)
 		return -EINVAL;
 
@@ -690,7 +842,29 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+	return cn20k_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
+static int
+cn20k_sso_rx_adapter_vector_limits(const struct rte_eventdev *dev,
+				   const struct rte_eth_dev *eth_dev,
+				   struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	RTE_SET_USED(eth_dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		return -ENOTSUP;
+
+	limits->log2_sz = true;
+	limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+	limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+	limits->min_timeout_ns = (SSO_AGGR_DEF_TMO + 1) * 100;
+	limits->max_timeout_ns = (BITMASK_ULL(11, 0) + 1) * limits->min_timeout_ns;
+
+	return 0;
 }
 
 static int
@@ -704,7 +878,8 @@ cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_e
 	if (ret)
 		*caps = 0;
 	else
-		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -807,6 +982,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_rx_adapter_vector_limits_get = cn20k_sso_rx_adapter_vector_limits,
+
 	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
 	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
 	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index c8ab560b0e..b09d845b09 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -139,10 +139,58 @@ cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, ui
 	return 1;
 }
 
+static __rte_always_inline uint16_t
+cn20k_sso_vwqe_split_tx(struct cn20k_sso_hws *ws, struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+			uint64_t *cmd, const uint64_t *txq_data, const uint32_t flags)
+{
+	uint16_t count = 0, port, queue, ret = 0, last_idx = 0;
+	struct cn20k_eth_txq *txq;
+	int32_t space;
+	int i;
+
+	port = mbufs[0]->port;
+	queue = rte_event_eth_tx_adapter_txq_get(mbufs[0]);
+	for (i = 0; i < nb_mbufs; i++) {
+		if (port != mbufs[i]->port || queue != rte_event_eth_tx_adapter_txq_get(mbufs[i])) {
+			if (count) {
+				txq = (struct cn20k_eth_txq
+					       *)(txq_data[(txq_data[port] >> 48) + queue] &
+						  (BIT_ULL(48) - 1));
+				/* Transmit based on queue depth */
+				space = cn20k_sso_sq_depth(txq);
+				if (space < count)
+					goto done;
+				cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx],
+							   count, cmd, flags | NIX_TX_VWQE_F);
+				ret += count;
+				count = 0;
+			}
+			port = mbufs[i]->port;
+			queue = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+			last_idx = i;
+		}
+		count++;
+	}
+	if (count) {
+		txq = (struct cn20k_eth_txq *)(txq_data[(txq_data[port] >> 48) + queue] &
+					       (BIT_ULL(48) - 1));
+		/* Transmit based on queue depth */
+		space = cn20k_sso_sq_depth(txq);
+		if (space < count)
+			goto done;
+		cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx], count, cmd,
+					   flags | NIX_TX_VWQE_F);
+		ret += count;
+	}
+done:
+	return ret;
+}
+
 static __rte_always_inline uint16_t
 cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
 		       const uint64_t *txq_data, const uint32_t flags)
 {
+	struct cn20k_eth_txq *txq;
 	struct rte_mbuf *m;
 	uintptr_t lmt_addr;
 	uint16_t lmt_id;
@@ -150,6 +198,42 @@ cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t
 	lmt_addr = ws->lmt_base;
 	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
 
+	if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+		struct rte_mbuf **mbufs = ev->vec->mbufs;
+		uint64_t meta = *(uint64_t *)ev->vec;
+		uint16_t offset, nb_pkts, left;
+		int32_t space;
+
+		nb_pkts = meta & 0xFFFF;
+		offset = (meta >> 16) & 0xFFF;
+		if (meta & BIT(31)) {
+			txq = (struct cn20k_eth_txq
+				       *)(txq_data[(txq_data[meta >> 32] >> 48) + (meta >> 48)] &
+					  (BIT_ULL(48) - 1));
+
+			/* Transmit based on queue depth */
+			space = cn20k_sso_sq_depth(txq);
+			if (space <= 0)
+				return 0;
+			nb_pkts = nb_pkts < space ? nb_pkts : (uint16_t)space;
+			cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs + offset, nb_pkts,
+						   cmd, flags | NIX_TX_VWQE_F);
+		} else {
+			nb_pkts = cn20k_sso_vwqe_split_tx(ws, mbufs + offset, nb_pkts, cmd,
+							  txq_data, flags);
+		}
+		left = (meta & 0xFFFF) - nb_pkts;
+
+		if (!left) {
+			rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+		} else {
+			*(uint64_t *)ev->vec =
+				(meta & ~0xFFFFFFFUL) | (((uint32_t)nb_pkts + offset) << 16) | left;
+		}
+		rte_prefetch0(ws);
+		return !left;
+	}
+
 	m = ev->mbuf;
 	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
 }
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 9075073fd2..5799e5cc49 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -41,6 +41,58 @@ cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info
 	}
 }
 
+static __rte_always_inline void
+cn20k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, struct cn20k_sso_hws *ws)
+{
+	uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM;
+	struct cnxk_timesync_info *tstamp = ws->tstamp[port_id];
+	void *lookup_mem = ws->lookup_mem;
+	uintptr_t lbase = ws->lmt_base;
+	struct rte_event_vector *vec;
+	uint16_t nb_mbufs, non_vec;
+	struct rte_mbuf **wqe;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+	int i;
+
+	mbuf_init |= ((uint64_t)port_id) << 48;
+	vec = (struct rte_event_vector *)vwqe;
+	wqe = vec->mbufs;
+
+	rte_prefetch0(&vec->ptrs[0]);
+#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
+	for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
+		rte_prefetch0(&vec->ptrs[i]);
+
+	if (flags & NIX_RX_OFFLOAD_TSTAMP_F && tstamp)
+		mbuf_init |= 8;
+
+	nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+	nb_mbufs = cn20k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs, flags | NIX_RX_VWQE_F,
+					      lookup_mem, tstamp, lbase, 0);
+	wqe += nb_mbufs;
+	non_vec = vec->nb_elem - nb_mbufs;
+
+	while (non_vec) {
+		struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+
+		mbuf = (struct rte_mbuf *)((char *)cqe - sizeof(struct rte_mbuf));
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp((uint64_t)wqe[0], (uint64_t)mbuf, tstamp);
+		wqe[0] = (struct rte_mbuf *)mbuf;
+		non_vec--;
+		wqe++;
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
@@ -65,6 +117,17 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
 			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
 		u64[1] = mbuf;
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		__uint128_t vwqe_hdr = *(__uint128_t *)u64[1];
+
+		vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | ((vwqe_hdr & 0xFFFF) << 48) |
+			   ((uint64_t)port << 32);
+		*(uint64_t *)u64[1] = (uint64_t)vwqe_hdr;
+		cn20k_process_vwqe(u64[1], port, flags, ws);
+		/* Mark vector mempool object as get */
+		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
+					  1);
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 4066497e6b..33b3538753 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -266,6 +266,9 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
 			      const struct rte_eth_dev *eth_dev);
 int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
 			     const struct rte_eth_dev *eth_dev);
+void cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev,
+			 struct cnxk_sso_evdev *dev);
+int cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id);
 int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
 				  const struct rte_eth_dev *eth_dev,
 				  int32_t tx_queue_id);
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3cac42111a..4cf48db74c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -167,9 +167,10 @@ cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
 	return rc;
 }
 
-static int
-cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+int
+cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
 	struct roc_nix_rq *rq;
 
 	rq = &cnxk_eth_dev->rqs[rq_id];
@@ -209,10 +210,11 @@ cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
 	return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
 }
 
-static void
-cnxk_sso_tstamp_cfg(uint16_t port_id, struct cnxk_eth_dev *cnxk_eth_dev,
-		    struct cnxk_sso_evdev *dev)
+void
+cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev, struct cnxk_sso_evdev *dev)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+
 	if (cnxk_eth_dev->rx_offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP || cnxk_eth_dev->ptp_en)
 		dev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
 }
@@ -263,7 +265,7 @@ cnxk_sso_rx_adapter_queue_add(
 
 		/* Propagate force bp devarg */
 		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
-		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, cnxk_eth_dev, dev);
+		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, eth_dev, dev);
 		cnxk_eth_dev->nb_rxq_sso++;
 	}
 
@@ -290,7 +292,7 @@ cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
 			cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
 	} else {
-		rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
 		cnxk_eth_dev->nb_rxq_sso--;
 
 		/* Enable drop_re if it was disabled earlier */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 21/22] common/cnxk: update timer base code
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (18 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 12:29           ` [PATCH v6 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Update event timer base code to support configuring
HW accelerated timer arm and cancel.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/tim.h        |  5 ++
 drivers/common/cnxk/roc_mbox.h      | 38 ++++++++++++-
 drivers/common/cnxk/roc_tim.c       | 84 ++++++++++++++++++++++++++---
 drivers/common/cnxk/roc_tim.h       | 20 +++++--
 drivers/common/cnxk/version.map     |  1 +
 drivers/event/cnxk/cnxk_tim_evdev.h |  5 --
 6 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index 82b094e3dc..75700a11b8 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -47,10 +47,15 @@
 #define TIM_LF_RAS_INT_ENA_W1S	   (0x310)
 #define TIM_LF_RAS_INT_ENA_W1C	   (0x318)
 #define TIM_LF_RING_REL		   (0x400)
+#define TIM_LF_SCHED_TIMER0	   (0x480)
+#define TIM_LF_RING_FIRST_EXPIRY   (0x558)
 
 #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
+#define TIM_MAX_INTERVAL_EXT_TICKS ((1ULL << 34) - 1)
 #define TIM_MAX_BUCKET_SIZE    ((1ULL << 20) - 2)
 #define TIM_MIN_BUCKET_SIZE    1
 #define TIM_BUCKET_WRAP_SIZE   3
+#define TIM_BUCKET_MIN_GAP     1
+#define TIM_NPA_TMO            0xFFFF
 
 #endif /* __TIM_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index db6e8f07b3..8c0e274684 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -164,6 +164,9 @@ struct mbox_msghdr {
 	  tim_intvl_rsp)                                                       \
 	M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req,          \
 	  tim_capture_rsp)                                                     \
+	M(TIM_CONFIG_HWWQE,    0x807, tim_config_hwwqe, tim_cfg_hwwqe_req,     \
+	  msg_rsp)                                                             \
+	M(TIM_GET_HW_INFO,     0x808, tim_get_hw_info, msg_req, tim_hw_info)   \
 	/* CPT mbox IDs (range 0xA00 - 0xBFF) */                               \
 	M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)    \
 	M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)                   \
@@ -2803,6 +2806,7 @@ enum tim_af_status {
 	TIM_AF_INVALID_ENABLE_DONTFREE = -815,
 	TIM_AF_ENA_DONTFRE_NSET_PERIODIC = -816,
 	TIM_AF_RING_ALREADY_DISABLED = -817,
+	TIM_AF_LF_START_SYNC_FAIL = -818,
 };
 
 enum tim_clk_srcs {
@@ -2895,13 +2899,43 @@ struct tim_config_req {
 	uint8_t __io enabledontfreebuffer;
 	uint32_t __io bucketsize;
 	uint32_t __io chunksize;
-	uint32_t __io interval;
+	uint32_t __io interval_lo;
 	uint8_t __io gpioedge;
-	uint8_t __io rsvd[7];
+	uint8_t __io rsvd[3];
+	uint32_t __io interval_hi;
 	uint64_t __io intervalns;
 	uint64_t __io clockfreq;
 };
 
+struct tim_cfg_hwwqe_req {
+	struct mbox_msghdr hdr;
+	uint16_t __io ring;
+	uint8_t __io grp_ena;
+	uint8_t __io hwwqe_ena;
+	uint8_t __io ins_min_gap;
+	uint8_t __io flw_ctrl_ena;
+	uint8_t __io wqe_rd_clr_ena;
+	uint16_t __io grp_tmo_cntr;
+	uint16_t __io npa_tmo_cntr;
+	uint16_t __io result_offset;
+	uint16_t __io event_count_offset;
+	uint64_t __io rsvd[2];
+};
+
+struct tim_feat_info {
+	uint16_t __io rings;
+	uint8_t __io engines;
+	uint8_t __io hwwqe : 1;
+	uint8_t __io intvl_ext : 1;
+	uint8_t __io rsvd8[4];
+	uint64_t __io rsvd[2];
+};
+
+struct tim_hw_info {
+	struct mbox_msghdr hdr;
+	struct tim_feat_info feat;
+};
+
 struct tim_lf_alloc_rsp {
 	struct mbox_msghdr hdr;
 	uint64_t __io tenns_clk;
diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index 83228fb2b6..e326ea0122 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+#define LF_ENABLE_RETRY_CNT 8
+
 static int
 tim_fill_msix(struct roc_tim *roc_tim, uint16_t nb_ring)
 {
@@ -86,8 +88,11 @@ tim_err_desc(int rc)
 	case TIM_AF_RING_ALREADY_DISABLED:
 		plt_err("Ring already stopped");
 		break;
+	case TIM_AF_LF_START_SYNC_FAIL:
+		plt_err("Ring start sync failed.");
+		break;
 	default:
-		plt_err("Unknown Error.");
+		plt_err("Unknown Error: %d", rc);
 	}
 }
 
@@ -123,10 +128,12 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
 	struct mbox *mbox = mbox_get(dev->mbox);
+	uint8_t retry_cnt = LF_ENABLE_RETRY_CNT;
 	struct tim_enable_rsp *rsp;
 	struct tim_ring_req *req;
 	int rc = -ENOSPC;
 
+retry:
 	req = mbox_alloc_msg_tim_enable_ring(mbox);
 	if (req == NULL)
 		goto fail;
@@ -134,6 +141,9 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 
 	rc = mbox_process_msg(dev->mbox, (void **)&rsp);
 	if (rc) {
+		if (rc == TIM_AF_LF_START_SYNC_FAIL && retry_cnt--)
+			goto retry;
+
 		tim_err_desc(rc);
 		rc = -EIO;
 		goto fail;
@@ -183,10 +193,9 @@ roc_tim_lf_base_get(struct roc_tim *roc_tim, uint8_t ring_id)
 }
 
 int
-roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-		  enum roc_tim_clk_src clk_src, uint8_t ena_periodic,
-		  uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
-		  uint32_t interval, uint64_t intervalns, uint64_t clockfreq)
+roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id, enum roc_tim_clk_src clk_src,
+		  uint8_t ena_periodic, uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
+		  uint64_t interval, uint64_t intervalns, uint64_t clockfreq)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
@@ -204,7 +213,8 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	req->clocksource = clk_src;
 	req->enableperiodic = ena_periodic;
 	req->enabledontfreebuffer = ena_dfb;
-	req->interval = interval;
+	req->interval_lo = interval;
+	req->interval_hi = interval >> 32;
 	req->intervalns = intervalns;
 	req->clockfreq = clockfreq;
 	req->gpioedge = TIM_GPIO_LTOH_TRANS;
@@ -220,6 +230,41 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	return rc;
 }
 
+int
+roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id, struct roc_tim_hwwqe_cfg *cfg)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
+	struct dev *dev = &sso->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_cfg_hwwqe_req *req;
+	int rc = -ENOSPC;
+
+	req = mbox_alloc_msg_tim_config_hwwqe(mbox);
+	if (req == NULL)
+		goto fail;
+	req->ring = ring_id;
+	req->hwwqe_ena = cfg->hwwqe_ena;
+	req->grp_ena = cfg->grp_ena;
+	req->grp_tmo_cntr = cfg->grp_tmo_cyc;
+	req->flw_ctrl_ena = cfg->flw_ctrl_ena;
+	req->result_offset = cfg->result_offset;
+	req->event_count_offset = cfg->event_count_offset;
+
+	req->wqe_rd_clr_ena = 1;
+	req->npa_tmo_cntr = TIM_NPA_TMO;
+	req->ins_min_gap = TIM_BUCKET_MIN_GAP;
+
+	rc = mbox_process(mbox);
+	if (rc) {
+		tim_err_desc(rc);
+		rc = -EIO;
+	}
+
+fail:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_lf_interval(struct roc_tim *roc_tim, enum roc_tim_clk_src clk_src,
 		    uint64_t clockfreq, uint64_t *intervalns,
@@ -353,6 +398,31 @@ tim_free_lf_count_get(struct dev *dev, uint16_t *nb_lfs)
 	return 0;
 }
 
+static int
+tim_hw_info_get(struct roc_tim *roc_tim)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_tim->roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_tim_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc != MBOX_MSG_INVALID)
+		mbox_memcpy(&roc_tim->feat, &rsp->feat, sizeof(roc_tim->feat));
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_init(struct roc_tim *roc_tim)
 {
@@ -372,6 +442,8 @@ roc_tim_init(struct roc_tim *roc_tim)
 	PLT_STATIC_ASSERT(sizeof(struct tim) <= TIM_MEM_SZ);
 	nb_lfs = roc_tim->nb_lfs;
 
+	rc = tim_hw_info_get(roc_tim);
+
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
 		plt_tim_dbg("Failed to get TIM resource count");
diff --git a/drivers/common/cnxk/roc_tim.h b/drivers/common/cnxk/roc_tim.h
index f9a9ad1887..2eb6e6962b 100644
--- a/drivers/common/cnxk/roc_tim.h
+++ b/drivers/common/cnxk/roc_tim.h
@@ -19,10 +19,20 @@ enum roc_tim_clk_src {
 	ROC_TIM_CLK_SRC_INVALID,
 };
 
+struct roc_tim_hwwqe_cfg {
+	uint8_t grp_ena;
+	uint8_t hwwqe_ena;
+	uint8_t flw_ctrl_ena;
+	uint16_t grp_tmo_cyc;
+	uint16_t result_offset;
+	uint16_t event_count_offset;
+};
+
 struct roc_tim {
 	struct roc_sso *roc_sso;
 	/* Public data. */
 	uint16_t nb_lfs;
+	struct tim_feat_info feat;
 	/* Private data. */
 #define TIM_MEM_SZ (1 * 1024)
 	uint8_t reserved[TIM_MEM_SZ] __plt_cache_aligned;
@@ -36,11 +46,11 @@ int __roc_api roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id,
 				uint64_t *start_tsc, uint32_t *cur_bkt);
 int __roc_api roc_tim_lf_disable(struct roc_tim *roc_tim, uint8_t ring_id);
 int __roc_api roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-				enum roc_tim_clk_src clk_src,
-				uint8_t ena_periodic, uint8_t ena_dfb,
-				uint32_t bucket_sz, uint32_t chunk_sz,
-				uint32_t interval, uint64_t intervalns,
-				uint64_t clockfreq);
+				enum roc_tim_clk_src clk_src, uint8_t ena_periodic, uint8_t ena_dfb,
+				uint32_t bucket_sz, uint32_t chunk_sz, uint64_t interval,
+				uint64_t intervalns, uint64_t clockfreq);
+int __roc_api roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id,
+				      struct roc_tim_hwwqe_cfg *cfg);
 int __roc_api roc_tim_lf_interval(struct roc_tim *roc_tim,
 				  enum roc_tim_clk_src clk_src,
 				  uint64_t clockfreq, uint64_t *intervalns,
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 14ee6031e2..e7381ae8b2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -528,6 +528,7 @@ INTERNAL {
 	roc_tim_lf_alloc;
 	roc_tim_lf_base_get;
 	roc_tim_lf_config;
+	roc_tim_lf_config_hwwqe;
 	roc_tim_lf_disable;
 	roc_tim_lf_enable;
 	roc_tim_lf_free;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index f4c61dfb44..c5b3d67eb8 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -16,11 +16,6 @@
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
 
-#include "hw/tim.h"
-
-#include "roc_model.h"
-#include "roc_tim.h"
-
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
 #define TICK2NSEC(__tck, __freq) (((__tck)*NSECPERSEC) / (__freq))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v6 22/22] event/cnxk: add CN20K timer adapter
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (19 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 21/22] common/cnxk: update timer base code pbhagavatula
@ 2024-10-25 12:29           ` pbhagavatula
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 12:29 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event timer adapter support for CN20K platform.
Implement new HWWQE insertion feature supported by CN20K platform.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_tim.c        |   6 +-
 drivers/event/cnxk/cn20k_eventdev.c  |  16 ++-
 drivers/event/cnxk/cn20k_worker.h    |   6 +
 drivers/event/cnxk/cnxk_tim_evdev.c  |  37 ++++-
 drivers/event/cnxk/cnxk_tim_evdev.h  |  14 ++
 drivers/event/cnxk/cnxk_tim_worker.c |  82 +++++++++--
 drivers/event/cnxk/cnxk_tim_worker.h | 201 +++++++++++++++++++++++++++
 7 files changed, 350 insertions(+), 12 deletions(-)

diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index e326ea0122..a1461fedb1 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -409,7 +409,7 @@ tim_hw_info_get(struct roc_tim *roc_tim)
 	mbox_alloc_msg_tim_get_hw_info(mbox);
 	rc = mbox_process_msg(mbox, (void **)&rsp);
 	if (rc && rc != MBOX_MSG_INVALID) {
-		plt_err("Failed to get SSO HW info");
+		plt_err("Failed to get TIM HW info");
 		rc = -EIO;
 		goto exit;
 	}
@@ -443,6 +443,10 @@ roc_tim_init(struct roc_tim *roc_tim)
 	nb_lfs = roc_tim->nb_lfs;
 
 	rc = tim_hw_info_get(roc_tim);
+	if (rc) {
+		plt_tim_dbg("Failed to get TIM HW info");
+		return 0;
+	}
 
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 57e15b6d8c..d68700fc05 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -957,6 +957,13 @@ cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
 	return cn20k_sso_updt_tx_adptr_data(event_dev);
 }
 
+static int
+cn20k_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags, uint32_t *caps,
+		   const struct event_timer_adapter_ops **ops)
+{
+	return cnxk_tim_caps_get(evdev, flags, caps, ops, cn20k_sso_set_priv_mem);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -991,6 +998,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
 	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
 
+	.timer_adapter_caps_get = cn20k_tim_caps_get,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -1068,4 +1077,9 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_STASH "=<string>"
-			      CNXK_SSO_FORCE_BP "=1");
+			      CNXK_SSO_FORCE_BP "=1"
+			      CNXK_TIM_DISABLE_NPA "=1"
+			      CNXK_TIM_CHNK_SLOTS "=<int>"
+			      CNXK_TIM_RINGS_LMT "=<int>"
+			      CNXK_TIM_STATS_ENA "=1"
+			      CNXK_TIM_EXT_CLK "=<string>");
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5799e5cc49..b014e549b9 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -5,6 +5,7 @@
 #ifndef __CN20K_WORKER_H__
 #define __CN20K_WORKER_H__
 
+#include <rte_event_timer_adapter.h>
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
@@ -128,6 +129,11 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		/* Mark vector mempool object as get */
 		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
 					  1);
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_TIMER) {
+		struct rte_event_timer *tev = (struct rte_event_timer *)u64[1];
+
+		tev->state = RTE_EVENT_TIMER_NOT_ARMED;
+		u64[1] = tev->ev.u64;
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 27a4dfb490..994d1d1090 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -78,9 +78,25 @@ cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
 	return rc;
 }
 
+static int
+cnxk_tim_enable_hwwqe(struct cnxk_tim_evdev *dev, struct cnxk_tim_ring *tim_ring)
+{
+	struct roc_tim_hwwqe_cfg hwwqe_cfg;
+
+	memset(&hwwqe_cfg, 0, sizeof(hwwqe_cfg));
+	hwwqe_cfg.hwwqe_ena = 1;
+	hwwqe_cfg.grp_ena = 0;
+	hwwqe_cfg.flw_ctrl_ena = 0;
+	hwwqe_cfg.result_offset = CNXK_TIM_HWWQE_RES_OFFSET_B;
+
+	tim_ring->lmt_base = dev->tim.roc_sso->lmt_base;
+	return roc_tim_lf_config_hwwqe(&dev->tim, tim_ring->ring_id, &hwwqe_cfg);
+}
+
 static void
 cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 {
+	struct cnxk_tim_evdev *dev = cnxk_tim_priv_get();
 	uint8_t prod_flag = !tim_ring->prod_type_sp;
 
 	/* [STATS] [DFB/FB] [SP][MP]*/
@@ -98,6 +114,16 @@ cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 #undef FP
 	};
 
+	if (dev == NULL)
+		return;
+
+	if (dev->tim.feat.hwwqe) {
+		cnxk_tim_ops.arm_burst = cnxk_tim_arm_burst_hwwqe;
+		cnxk_tim_ops.arm_tmo_tick_burst = cnxk_tim_arm_tmo_burst_hwwqe;
+		cnxk_tim_ops.cancel_burst = cnxk_tim_timer_cancel_burst_hwwqe;
+		return;
+	}
+
 	cnxk_tim_ops.arm_burst =
 		arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag];
 	cnxk_tim_ops.arm_tmo_tick_burst =
@@ -224,12 +250,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		}
 	}
 
-	if (tim_ring->disable_npa) {
+	if (!dev->tim.feat.hwwqe && tim_ring->disable_npa) {
 		tim_ring->nb_chunks =
 			tim_ring->nb_timers /
 			CNXK_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
 		tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts;
 	} else {
+		tim_ring->disable_npa = 0;
 		tim_ring->nb_chunks = tim_ring->nb_timers;
 	}
 
@@ -255,6 +282,14 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		goto tim_chnk_free;
 	}
 
+	if (dev->tim.feat.hwwqe) {
+		rc = cnxk_tim_enable_hwwqe(dev, tim_ring);
+		if (rc < 0) {
+			plt_err("Failed to enable hwwqe");
+			goto tim_chnk_free;
+		}
+	}
+
 	plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
 	plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index c5b3d67eb8..114a89ee5a 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -15,6 +15,7 @@
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
+#include <rte_vect.h>
 
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
@@ -29,6 +30,8 @@
 #define CNXK_TIM_MIN_CHUNK_SLOTS    (0x1)
 #define CNXK_TIM_MAX_CHUNK_SLOTS    (0x1FFE)
 #define CNXK_TIM_MAX_POOL_CACHE_SZ  (16)
+#define CNXK_TIM_HWWQE_RES_OFFSET_B (24)
+#define CNXK_TIM_ENT_PER_LMT	    (7)
 
 #define CN9K_TIM_MIN_TMO_TKS (256)
 
@@ -124,6 +127,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	uintptr_t tbase;
 	uint64_t (*tick_fn)(uint64_t tbase);
 	uint64_t ring_start_cyc;
+	uint64_t lmt_base;
 	struct cnxk_tim_bkt *bkt;
 	struct rte_mempool *chunk_pool;
 	struct rte_reciprocal_u64 fast_div;
@@ -310,11 +314,21 @@ TIM_ARM_FASTPATH_MODES
 TIM_ARM_TMO_FASTPATH_MODES
 #undef FP
 
+uint16_t cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers);
+
+uint16_t cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				      struct rte_event_timer **tim, const uint64_t timeout_tick,
+				      const uint16_t nb_timers);
+
 uint16_t
 cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 			    struct rte_event_timer **tim,
 			    const uint16_t nb_timers);
 
+uint16_t cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+					   struct rte_event_timer **tim, const uint16_t nb_timers);
+
 int cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 				 const struct rte_event_timer *evtim, uint64_t *ticks_remaining);
 
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 5e96f6f188..42d376d375 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -32,15 +32,6 @@ cnxk_tim_arm_checks(const struct cnxk_tim_ring *const tim_ring,
 	return -EINVAL;
 }
 
-static inline void
-cnxk_tim_format_event(const struct rte_event_timer *const tim,
-		      struct cnxk_tim_ent *const entry)
-{
-	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 |
-		    (tim->ev.event & 0xFFFFFFFFF);
-	entry->wqe = tim->ev.u64;
-}
-
 static __rte_always_inline uint16_t
 cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 			 struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -77,6 +68,24 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr, struct rte_event_timer **tim,
+			 const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t index;
+
+	for (index = 0; index < nb_timers; index++) {
+		if (cnxk_tim_arm_checks(tim_ring, tim[index]))
+			break;
+
+		if (cnxk_tim_add_entry_hwwqe(tim_ring, tim[index]))
+			break;
+	}
+
+	return index;
+}
+
 #define FP(_name, _f3, _f2, _f1, _flags)                                       \
 	uint16_t __rte_noinline cnxk_tim_arm_burst_##_name(                    \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -132,6 +141,29 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	return set_timers;
 }
 
+uint16_t
+cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+			     struct rte_event_timer **tim, const uint64_t timeout_tick,
+			     const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t idx;
+
+	if (unlikely(!timeout_tick || timeout_tick > tim_ring->nb_bkts)) {
+		const enum rte_event_timer_state state = timeout_tick ?
+								 RTE_EVENT_TIMER_ERROR_TOOLATE :
+								 RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		for (idx = 0; idx < nb_timers; idx++)
+			tim[idx]->state = state;
+
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	return cnxk_tim_add_entry_tmo_hwwqe(tim_ring, tim, timeout_tick * tim_ring->tck_int,
+					    nb_timers);
+}
+
 #define FP(_name, _f2, _f1, _flags)                                            \
 	uint16_t __rte_noinline cnxk_tim_arm_tmo_tick_burst_##_name(           \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -174,6 +206,38 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t i;
+
+	RTE_SET_USED(adptr);
+	for (i = 0; i < nb_timers; i++) {
+		if (tim[i]->state == RTE_EVENT_TIMER_CANCELED) {
+			rte_errno = EALREADY;
+			break;
+		}
+
+		if (tim[i]->state != RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		status = (uint64_t __rte_atomic *)&tim[i]->impl_opaque[1];
+		if (!rte_atomic_compare_exchange_strong_explicit(status, (uint64_t *)&tim[i], 0,
+								 rte_memory_order_release,
+								 rte_memory_order_relaxed)) {
+			rte_errno = ENOENT;
+			break;
+		}
+		tim[i]->state = RTE_EVENT_TIMER_CANCELED;
+	}
+
+	return i;
+}
+
 int
 cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 			     const struct rte_event_timer *evtim, uint64_t *ticks_remaining)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index e52eadbc08..be6744db51 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -132,6 +132,13 @@ cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
 	return (n - (d * rte_reciprocal_divide_u64(n, &R)));
 }
 
+static inline void
+cnxk_tim_format_event(const struct rte_event_timer *const tim, struct cnxk_tim_ent *const entry)
+{
+	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	entry->wqe = tim->ev.u64;
+}
+
 static __rte_always_inline void
 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
 			   const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
@@ -573,6 +580,200 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 	return nb_timers;
 }
 
+static int
+cnxk_tim_add_entry_hwwqe(struct cnxk_tim_ring *const tim_ring, struct rte_event_timer *const tim)
+{
+	uint64_t __rte_atomic *status;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+	int rc = 0;
+
+	status = (uint64_t __rte_atomic *)&tim->impl_opaque[0];
+	status[0] = 0;
+	status[1] = 0;
+
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	lmt = (uint64_t *)lmt_addr;
+
+	lmt[0] = tim->timeout_ticks * tim_ring->tck_int;
+	lmt[1] = 0x1;
+	lmt[2] = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	lmt[3] = (uint64_t)tim;
+
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (1UL << 4);
+	roc_lmt_submit_steorl(wdata, pa);
+
+	do {
+		rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+		rsp &= 0xF0UL;
+	} while (!rsp);
+
+	rsp >>= 4;
+	switch (rsp) {
+	case 0x3:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		rc = !rc;
+		break;
+	case 0x4:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+		rc = !rc;
+		break;
+	case 0x1:
+		tim->state = RTE_EVENT_TIMER_ARMED;
+		break;
+	default:
+		tim->state = RTE_EVENT_TIMER_ERROR;
+		rc = !rc;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+cnxk_tim_add_entry_tmo_hwwqe(struct cnxk_tim_ring *const tim_ring,
+			     struct rte_event_timer **const tim, uint64_t intvl, uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t cnt, i, j, done;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+
+	/* We have 32 LMTLINES per core, but use only 1 line as we need to check status */
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	done = 0;
+	lmt = (uint64_t *)lmt_addr;
+	/* We can do upto 7 timers per LMTLINE */
+	cnt = nb_timers / CNXK_TIM_ENT_PER_LMT;
+
+	lmt[0] = intvl;
+	lmt[1] = 0x1; /* Always relative */
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)(CNXK_TIM_ENT_PER_LMT << 4);
+	for (i = 0; i < cnt; i++) {
+		status = (uint64_t __rte_atomic *)&tim[i * CNXK_TIM_ENT_PER_LMT]->impl_opaque[0];
+
+		for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+			cnxk_tim_format_event(tim[(i * CNXK_TIM_ENT_PER_LMT) + j],
+					      (struct cnxk_tim_ent *)&lmt[(j << 1) + 2]);
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[0] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[1] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += CNXK_TIM_ENT_PER_LMT;
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x4:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic
+							      *)&tim[(i * CNXK_TIM_ENT_PER_LMT) + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+							RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+			goto done;
+		}
+	}
+
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)((nb_timers - cnt) << 4);
+	if (nb_timers - cnt) {
+		status = (uint64_t __rte_atomic *)&tim[cnt]->impl_opaque[0];
+
+		for (i = 0; i < nb_timers - cnt; i++) {
+			cnxk_tim_format_event(tim[cnt + i],
+					      (struct cnxk_tim_ent *)&lmt[(i << 1) + 2]);
+			tim[cnt + i]->impl_opaque[0] = 0;
+			tim[cnt + i]->impl_opaque[1] = 0;
+			tim[cnt + i]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += (nb_timers - cnt);
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x4:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < nb_timers - cnt; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic *)&tim[cnt + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+done:
+	return done;
+}
+
 static int
 cnxk_tim_rm_entry(struct rte_event_timer *tim)
 {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 01/22] event/cnxk: use stdatomic API
  2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
                             ` (20 preceding siblings ...)
  2024-10-25 12:29           ` [PATCH v6 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
@ 2024-10-25 13:03           ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 02/22] common/cnxk: implement SSO HW info pbhagavatula
                               ` (21 more replies)
  21 siblings, 22 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Replace gcc inbuilt __atomic_xxx intrinsics with rte_atomic_xxx API.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
Depends-on: series-33602 ("event/cnxk: fix getwork write data on reconfig")

v2 Changes:
- Rebase and remove single dequeue and enqueue functions.
v3 Changes:
- Remove __atomic builtins.
v4 Changes:
- Rebase onto next-event tree.
v5 Changes:
- Rebase, shuffle release notes order.
v6 Changes:
- Remove unnecessary net/cnxk changes.
v7 Changes:
- Add depends on tag to make CI run.

 drivers/event/cnxk/cn10k_eventdev.c         |  6 +--
 drivers/event/cnxk/cn10k_eventdev.h         |  4 +-
 drivers/event/cnxk/cn10k_tx_worker.h        |  7 ++-
 drivers/event/cnxk/cn10k_worker.c           | 15 +++---
 drivers/event/cnxk/cn10k_worker.h           |  2 +-
 drivers/event/cnxk/cn9k_eventdev.c          |  8 +--
 drivers/event/cnxk/cn9k_worker.h            | 19 ++++---
 drivers/event/cnxk/cnxk_eventdev.h          |  4 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c | 60 ++++++++++-----------
 drivers/event/cnxk/cnxk_tim_evdev.c         |  4 +-
 drivers/event/cnxk/cnxk_tim_evdev.h         | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.c        | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.h        | 57 ++++++++++----------
 drivers/event/cnxk/cnxk_worker.h            |  3 +-
 14 files changed, 108 insertions(+), 101 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4edac33a84..4a2c88c8c6 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -94,9 +94,9 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	uint64_t val;

 	ws->grp_base = grp_base;
-	ws->fc_mem = (int64_t *)dev->fc_iova;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 	ws->xaq_lmt = dev->xaq_lmt;
-	ws->fc_cache_space = dev->fc_cache_space;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);

@@ -768,7 +768,7 @@ cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 	for (i = 0; i < dev->nb_event_ports; i++) {
 		struct cn10k_sso_hws *ws = event_dev->data->ports[i];
 		ws->xaq_lmt = dev->xaq_lmt;
-		ws->fc_mem = (int64_t *)dev->fc_iova;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 		ws->tstamp = dev->tstamp;
 		if (lookup_mem)
 			ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index 372121465c..b8395aa314 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -19,8 +19,8 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	uint64_t meta_aura;
 	/* Add Work Fastpath data */
-	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
-	int64_t *fc_cache_space;
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
 	int32_t xaq_lmt;
diff --git a/drivers/event/cnxk/cn10k_tx_worker.h b/drivers/event/cnxk/cn10k_tx_worker.h
index 0695ea23e1..19cb2e22e5 100644
--- a/drivers/event/cnxk/cn10k_tx_worker.h
+++ b/drivers/event/cnxk/cn10k_tx_worker.h
@@ -51,7 +51,9 @@ cn10k_sso_txq_fc_wait(const struct cn10k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -60,7 +62,8 @@ static __rte_always_inline int32_t
 cn10k_sso_sq_depth(const struct cn10k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index c49138316c..06ad7437d5 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -16,7 +16,7 @@ cn10k_sso_hws_new_event(struct cn10k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -80,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
-	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) *
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
 	       ROC_SSO_XAE_PER_XAQ;
 }

@@ -90,19 +90,20 @@ sso_lmt_aw_wait_fc(struct cn10k_sso_hws *ws, int64_t req)
 	int64_t cached, refill;

 retry:
-	while (__atomic_load_n(ws->fc_cache_space, __ATOMIC_RELAXED) < 0)
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
 		;

-	cached = __atomic_fetch_sub(ws->fc_cache_space, req, __ATOMIC_ACQUIRE) - req;
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
 	/* Check if there is enough space, else update and retry. */
 	if (cached < 0) {
 		/* Check if we have space else retry. */
 		do {
 			refill = sso_read_xaq_space(ws);
 		} while (refill <= 0);
-		__atomic_compare_exchange(ws->fc_cache_space, &cached, &refill,
-					  0, __ATOMIC_RELEASE,
-					  __ATOMIC_RELAXED);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
 		goto retry;
 	}
 }
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 5d3394508e..954dee5a2a 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -311,7 +311,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 		roc_load_pair(gw.u64[0], gw.u64[1],
 			      ws->base + SSOW_LF_GWS_WQE0);
 	} while (gw.u64[0] & BIT_ULL(63));
-	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
 #endif
 	ws->gw_rdata = gw.u64[0];
 	if (gw.u64[1])
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index b176044aa5..05e237c005 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -74,7 +74,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	if (dev->dual_ws) {
 		dws = hws;
 		dws->grp_base = grp_base;
-		dws->fc_mem = (uint64_t *)dev->fc_iova;
+		dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		dws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, dws->base[0] + SSOW_LF_GWS_NW_TIM);
@@ -82,7 +82,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	} else {
 		ws = hws;
 		ws->grp_base = grp_base;
-		ws->fc_mem = (uint64_t *)dev->fc_iova;
+		ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		ws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
@@ -822,14 +822,14 @@ cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 			struct cn9k_sso_hws_dual *dws =
 				event_dev->data->ports[i];
 			dws->xaq_lmt = dev->xaq_lmt;
-			dws->fc_mem = (uint64_t *)dev->fc_iova;
+			dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			dws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				dws->lookup_mem = lookup_mem;
 		} else {
 			struct cn9k_sso_hws *ws = event_dev->data->ports[i];
 			ws->xaq_lmt = dev->xaq_lmt;
-			ws->fc_mem = (uint64_t *)dev->fc_iova;
+			ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			ws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 064cdfe94a..f07b8a9bff 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -28,7 +28,7 @@ cn9k_sso_hws_new_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -71,7 +71,7 @@ cn9k_sso_hws_new_event_wait(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (ws->xaq_lmt <= __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED))
+	while (ws->xaq_lmt <= rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -93,7 +93,7 @@ cn9k_sso_hws_forward_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(ws->base);
 		cn9k_sso_hws_new_event_wait(ws, ev);
 	}
@@ -110,7 +110,7 @@ cn9k_sso_hws_dual_new_event(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (dws->xaq_lmt <= *dws->fc_mem)
 		return 0;

@@ -128,7 +128,7 @@ cn9k_sso_hws_dual_new_event_wait(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (dws->xaq_lmt <= __atomic_load_n(dws->fc_mem, __ATOMIC_RELAXED))
+	while (dws->xaq_lmt <= rte_atomic_load_explicit(dws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -151,7 +151,7 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, uint64_t base,
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(base);
 		cn9k_sso_hws_dual_new_event_wait(dws, ev);
 	}
@@ -571,7 +571,9 @@ cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -740,7 +742,8 @@ static __rte_always_inline int32_t
 cn9k_sso_sq_depth(const struct cn9k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index f147ef3c78..982bbb6a9b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -136,7 +136,7 @@ struct __rte_cache_aligned cn9k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
@@ -154,7 +154,7 @@ struct __rte_cache_aligned cn9k_sso_hws_dual {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 95c0f1b1f7..a4615c1356 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -63,7 +63,7 @@ seqn_list_update(int val)
 		return -1;

 	seqn_list[seqn_list_index++] = val;
-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	return 0;
 }

@@ -82,7 +82,7 @@ seqn_list_check(int limit)
 }

 struct test_core_param {
-	uint32_t *total_events;
+	uint32_t __rte_atomic *total_events;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port;
 	uint8_t sched_type;
@@ -540,13 +540,13 @@ static int
 worker_multi_port_fn(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;
 	int ret;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;
@@ -554,30 +554,30 @@ worker_multi_port_fn(void *arg)
 		ret = validate_event(&ev);
 		RTE_TEST_ASSERT_SUCCESS(ret, "Failed to validate event");
 		rte_pktmbuf_free(ev.mbuf);
-		__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+		rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 	}

 	return 0;
 }

 static inline int
-wait_workers_to_join(const uint32_t *count)
+wait_workers_to_join(const uint32_t __rte_atomic *count)
 {
 	uint64_t cycles, print_cycles;

 	cycles = rte_get_timer_cycles();
 	print_cycles = cycles;
-	while (__atomic_load_n(count, __ATOMIC_RELAXED)) {
+	while (rte_atomic_load_explicit(count, rte_memory_order_relaxed)) {
 		uint64_t new_cycles = rte_get_timer_cycles();

 		if (new_cycles - print_cycles > rte_get_timer_hz()) {
 			plt_info("Events %d",
-				 __atomic_load_n(count, __ATOMIC_RELAXED));
+				 rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			print_cycles = new_cycles;
 		}
 		if (new_cycles - cycles > rte_get_timer_hz() * 10000000000) {
 			plt_err("No schedules for seconds, deadlock (%d)",
-				__atomic_load_n(count, __ATOMIC_RELAXED));
+				rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			rte_event_dev_dump(evdev, stdout);
 			cycles = new_cycles;
 			return -1;
@@ -593,7 +593,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 			int (*worker_thread)(void *), uint32_t total_events,
 			uint8_t nb_workers, uint8_t sched_type)
 {
-	uint32_t atomic_total_events;
+	uint32_t __rte_atomic atomic_total_events;
 	struct test_core_param *param;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port = 0;
@@ -603,7 +603,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 	if (!nb_workers)
 		return 0;

-	__atomic_store_n(&atomic_total_events, total_events, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&atomic_total_events, total_events, rte_memory_order_relaxed);
 	seqn_list_init();

 	param = malloc(sizeof(struct test_core_param) * nb_workers);
@@ -640,7 +640,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		param[port].sched_type = sched_type;
 		param[port].port = port;
 		param[port].dequeue_tmo_ticks = dequeue_tmo_ticks;
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		w_lcore = rte_get_next_lcore(w_lcore, 1, 0);
 		if (w_lcore == RTE_MAX_LCORE) {
 			plt_err("Failed to get next available lcore");
@@ -651,7 +651,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		rte_eal_remote_launch(worker_thread, &param[port], w_lcore);
 	}

-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	ret = wait_workers_to_join(&atomic_total_events);
 	free(param);

@@ -890,13 +890,13 @@ worker_flow_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -916,8 +916,8 @@ worker_flow_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1046,13 +1046,13 @@ worker_group_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -1072,8 +1072,8 @@ worker_group_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1205,19 +1205,19 @@ static int
 worker_flow_based_pipeline_max_stages_rand_sched_type(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.sub_event_type == MAX_STAGES) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.sub_event_type++;
@@ -1284,16 +1284,16 @@ worker_queue_based_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
@@ -1329,16 +1329,16 @@ worker_mixed_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* Last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index bba70646fa..74a6da5070 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -358,7 +358,7 @@ cnxk_tim_stats_get(const struct rte_event_timer_adapter *adapter,
 		tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;

 	stats->evtim_exp_count =
-		__atomic_load_n(&tim_ring->arm_cnt, __ATOMIC_RELAXED);
+		rte_atomic_load_explicit(&tim_ring->arm_cnt, rte_memory_order_relaxed);
 	stats->ev_enq_count = stats->evtim_exp_count;
 	stats->adapter_tick_count =
 		rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div);
@@ -370,7 +370,7 @@ cnxk_tim_stats_reset(const struct rte_event_timer_adapter *adapter)
 {
 	struct cnxk_tim_ring *tim_ring = adapter->data->adapter_priv;

-	__atomic_store_n(&tim_ring->arm_cnt, 0, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&tim_ring->arm_cnt, 0, rte_memory_order_relaxed);
 	return 0;
 }

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 6cf10dbf4d..f4c61dfb44 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -108,15 +108,15 @@ struct cnxk_tim_evdev {
 struct cnxk_tim_bkt {
 	uint64_t first_chunk;
 	union {
-		uint64_t w1;
+		uint64_t __rte_atomic w1;
 		struct {
-			uint32_t nb_entry;
+			uint32_t __rte_atomic nb_entry;
 			uint8_t sbt : 1;
 			uint8_t hbt : 1;
 			uint8_t bsk : 1;
 			uint8_t rsvd : 5;
-			uint8_t lock;
-			int16_t chunk_remainder;
+			uint8_t __rte_atomic lock;
+			int16_t __rte_atomic chunk_remainder;
 		};
 	};
 	uint64_t current_chunk;
@@ -134,7 +134,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	struct rte_reciprocal_u64 fast_div;
 	struct rte_reciprocal_u64 fast_bkt;
 	uint64_t tck_int;
-	uint64_t arm_cnt;
+	uint64_t __rte_atomic arm_cnt;
 	uintptr_t base;
 	uint8_t prod_type_sp;
 	uint8_t enable_stats;
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 1f2f2fe5d8..db31f91818 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -70,7 +70,7 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, index, __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, index, rte_memory_order_relaxed);

 	return index;
 }
@@ -124,8 +124,8 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers,
-				   __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, set_timers,
+					      rte_memory_order_relaxed);

 	return set_timers;
 }
@@ -151,7 +151,7 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	int ret;

 	RTE_SET_USED(adptr);
-	rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+	rte_atomic_thread_fence(rte_memory_order_acquire);
 	for (index = 0; index < nb_timers; index++) {
 		if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) {
 			rte_errno = EALREADY;
@@ -193,7 +193,7 @@ cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 		return -ENOENT;

 	bkt = (struct cnxk_tim_bkt *)evtim->impl_opaque[1];
-	sema = __atomic_load_n(&bkt->w1, rte_memory_order_acquire);
+	sema = rte_atomic_load_explicit(&bkt->w1, rte_memory_order_acquire);
 	if (cnxk_tim_bkt_get_hbt(sema) || !cnxk_tim_bkt_get_nent(sema))
 		return -ENOENT;

diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index f530d8c5c4..e52eadbc08 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -23,19 +23,19 @@ cnxk_tim_bkt_fetch_rem(uint64_t w1)
 static inline int16_t
 cnxk_tim_bkt_get_rem(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
+	return rte_atomic_load_explicit(&bktp->chunk_remainder, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_set_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_sub_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline uint8_t
@@ -56,20 +56,20 @@ cnxk_tim_bkt_clr_bsk(struct cnxk_tim_bkt *bktp)
 	/* Clear everything except lock. */
 	const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema_lock(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
-				  __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
+					     rte_memory_order_acquire);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA, rte_memory_order_relaxed);
 }

 static inline uint64_t
@@ -77,19 +77,19 @@ cnxk_tim_bkt_inc_lock(struct cnxk_tim_bkt *bktp)
 {
 	const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, v, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_dec_lock(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_release);
 }

 static inline void
 cnxk_tim_bkt_dec_lock_relaxed(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_relaxed);
 }

 static inline uint32_t
@@ -102,19 +102,19 @@ cnxk_tim_bkt_get_nent(uint64_t w1)
 static inline void
 cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_add(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, 1, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent_relaxed(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELEASE);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_release);
 }

 static inline uint64_t
@@ -123,7 +123,7 @@ cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
 	const uint64_t v =
 		~(TIM_BUCKET_W1_M_NUM_ENTRIES << TIM_BUCKET_W1_S_NUM_ENTRIES);

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL) & v;
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel) & v;
 }

 static inline uint64_t
@@ -273,8 +273,8 @@ cnxk_tim_add_entry_sp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -356,8 +356,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -385,8 +385,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 			     : [crem] "r"(&bkt->w1)
 			     : "memory");
 #else
-		while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
-		       0)
+		while (rte_atomic_load_explicit((int64_t __rte_atomic *)&bkt->w1,
+						rte_memory_order_relaxed) < 0)
 			;
 #endif
 		goto __retry;
@@ -408,15 +408,14 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 		*chunk = *pent;
 		if (cnxk_tim_bkt_fetch_lock(lock_sema)) {
 			do {
-				lock_sema = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				lock_sema = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (cnxk_tim_bkt_fetch_lock(lock_sema) - 1);
 		}
-		rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+		rte_atomic_thread_fence(rte_memory_order_acquire);
 		mirr_bkt->current_chunk = (uintptr_t)chunk;
-		__atomic_store_n(&bkt->chunk_remainder,
-				 tim_ring->nb_chunk_slots - 1,
-				 __ATOMIC_RELEASE);
+		rte_atomic_store_explicit(&bkt->chunk_remainder, tim_ring->nb_chunk_slots - 1,
+					  rte_memory_order_release);
 	} else {
 		chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
 		chunk += tim_ring->nb_chunk_slots - rem;
@@ -489,8 +488,8 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -521,7 +520,7 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 			     : [lock] "r"(&bkt->lock)
 			     : "memory");
 #else
-		while (__atomic_load_n(&bkt->lock, __ATOMIC_RELAXED))
+		while (rte_atomic_load_explicit(&bkt->lock, rte_memory_order_relaxed))
 			;
 #endif
 		goto __retry;
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 0e0d728ba4..3592344e04 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -33,7 +33,8 @@ cnxk_sso_hws_swtag_desched(uint32_t tag, uint8_t new_tt, uint16_t grp,
 	uint64_t val;

 	val = tag | ((uint64_t)(new_tt & 0x3) << 32) | ((uint64_t)grp << 34);
-	__atomic_store_n((uint64_t *)swtag_desched_op, val, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit((uint64_t __rte_atomic *)swtag_desched_op, val,
+				  rte_memory_order_release);
 }

 static __rte_always_inline void
--
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 02/22] common/cnxk: implement SSO HW info
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
                               ` (20 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Ankur Dwivedi, Anoob Joseph, Tejasree Kondoj, Pavan Nikhilesh,
	Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HW info mbox to get hardware capabilities, and reuse
them instead of depending on hardcoded values.
Remove redundant includes.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_mbox.h              | 28 ++++++++++
 drivers/common/cnxk/roc_sso.c               | 58 ++++++++++++++++++---
 drivers/common/cnxk/roc_sso.h               |  9 ++--
 drivers/common/cnxk/version.map             |  1 +
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c   |  5 +-
 drivers/crypto/cnxk/cn9k_cryptodev_ops.c    |  9 +---
 drivers/event/cnxk/cn10k_eventdev.c         |  1 +
 drivers/event/cnxk/cn10k_eventdev.h         |  1 +
 drivers/event/cnxk/cn10k_worker.c           |  6 ++-
 drivers/event/cnxk/cnxk_eventdev.c          |  4 +-
 drivers/event/cnxk/cnxk_eventdev.h          |  3 --
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  2 +
 drivers/event/cnxk/cnxk_eventdev_stats.c    |  2 +
 drivers/event/cnxk/cnxk_tim_evdev.c         |  2 +-
 drivers/event/cnxk/cnxk_tim_worker.c        |  2 +
 drivers/event/cnxk/cnxk_worker.c            |  4 +-
 16 files changed, 103 insertions(+), 34 deletions(-)

diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index dd65946e9e..63139b5517 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,7 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
 	  tim_lf_alloc_rsp)                                                    \
@@ -2119,6 +2120,33 @@ struct ssow_chng_mship {
 	uint16_t __io hwgrps[MAX_RVU_BLKLF_CNT]; /* Array of hwgrps. */
 };
 
+struct sso_feat_info {
+	uint8_t __io hw_flr : 1;
+	uint8_t __io hw_prefetch : 1;
+	uint8_t __io sw_prefetch : 1;
+	uint8_t __io lsw : 1;
+	uint8_t __io fwd_grp : 1;
+	uint8_t __io eva_present : 1;
+	uint8_t __io no_nsched : 1;
+	uint8_t __io tag_cfg : 1;
+	uint8_t __io gwc_per_core;
+	uint16_t __io hws;
+	uint16_t __io hwgrps;
+	uint16_t __io hwgrps_per_pf;
+	uint16_t __io iue;
+	uint16_t __io taq_lines;
+	uint16_t __io taq_ent_per_line;
+	uint16_t __io xaq_buf_size;
+	uint16_t __io xaq_wq_entries;
+	uint32_t __io eva_ctx_per_hwgrp;
+	uint64_t __io rsvd[2];
+};
+
+struct sso_hw_info {
+	struct mbox_msghdr hdr;
+	struct sso_feat_info feat;
+};
+
 struct sso_hw_setconfig {
 	struct mbox_msghdr hdr;
 	uint32_t __io npa_aura_id;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 2e3b134bfc..8a219b985b 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -191,7 +191,7 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 		goto exit;
 	}
 
-	roc_sso->max_hwgrp = rsrc_cnt->sso;
+	roc_sso->max_hwgrp = PLT_MIN(rsrc_cnt->sso, roc_sso->feat.hwgrps_per_pf);
 	roc_sso->max_hws = rsrc_cnt->ssow;
 
 	rc = 0;
@@ -200,6 +200,37 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 	return rc;
 }
 
+static int
+sso_hw_info_get(struct roc_sso *roc_sso)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct sso_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_sso_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc == MBOX_MSG_INVALID) {
+		roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	} else {
+		mbox_memcpy(&roc_sso->feat, &rsp->feat, sizeof(roc_sso->feat));
+
+		if (!roc_sso->feat.hwgrps_per_pf)
+			roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	}
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 void
 sso_hws_link_modify(uint8_t hws, uintptr_t base, struct plt_bitmap *bmp, uint16_t hwgrp[],
 		    uint16_t n, uint8_t set, uint16_t enable)
@@ -319,6 +350,12 @@ roc_sso_hwgrp_base_get(struct roc_sso *roc_sso, uint16_t hwgrp)
 	return dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | hwgrp << 12);
 }
 
+uint16_t
+roc_sso_pf_func_get(void)
+{
+	return idev_sso_pffunc_get();
+}
+
 uint64_t
 roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 {
@@ -670,9 +707,8 @@ roc_sso_hwgrp_init_xaq_aura(struct roc_sso *roc_sso, uint32_t nb_xae)
 	struct dev *dev = &sso->dev;
 	int rc;
 
-	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae,
-				     roc_sso->xae_waes, roc_sso->xaq_buf_size,
-				     roc_sso->nb_hwgrp);
+	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae, roc_sso->feat.xaq_wq_entries,
+				     roc_sso->feat.xaq_buf_size, roc_sso->nb_hwgrp);
 	return rc;
 }
 
@@ -953,9 +989,11 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 		goto hwgrp_alloc_fail;
 	}
 
-	roc_sso->xaq_buf_size = rsp_hwgrp->xaq_buf_size;
-	roc_sso->xae_waes = rsp_hwgrp->xaq_wq_entries;
-	roc_sso->iue = rsp_hwgrp->in_unit_entries;
+	if (!roc_sso->feat.xaq_buf_size || !roc_sso->feat.xaq_wq_entries || !roc_sso->feat.iue) {
+		roc_sso->feat.xaq_buf_size = rsp_hwgrp->xaq_buf_size;
+		roc_sso->feat.xaq_wq_entries = rsp_hwgrp->xaq_wq_entries;
+		roc_sso->feat.iue = rsp_hwgrp->in_unit_entries;
+	}
 
 	rc = sso_msix_fill(roc_sso, nb_hws, nb_hwgrp);
 	if (rc < 0) {
@@ -1059,6 +1097,12 @@ roc_sso_dev_init(struct roc_sso *roc_sso)
 		goto fail;
 	}
 
+	rc = sso_hw_info_get(roc_sso);
+	if (rc < 0) {
+		plt_err("Failed to get SSO HW info");
+		goto fail;
+	}
+
 	rc = sso_rsrc_get(roc_sso);
 	if (rc < 0) {
 		plt_err("Failed to get SSO resources");
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 4ac901762e..021db22c86 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -8,7 +8,7 @@
 #include "hw/ssow.h"
 
 #define ROC_SSO_AW_PER_LMT_LINE_LOG2 3
-#define ROC_SSO_XAE_PER_XAQ	     352
+#define ROC_SSO_MAX_HWGRP_PER_PF     256
 
 struct roc_sso_hwgrp_qos {
 	uint16_t hwgrp;
@@ -57,9 +57,7 @@ struct roc_sso {
 	uintptr_t lmt_base;
 	struct roc_sso_xaq_data xaq;
 	/* HW Const. */
-	uint32_t xae_waes;
-	uint32_t xaq_buf_size;
-	uint32_t iue;
+	struct sso_feat_info feat;
 	/* Private data. */
 #define ROC_SSO_MEM_SZ (16 * 1024)
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
@@ -103,6 +101,9 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
 
+/* Utility function */
+uint16_t __roc_api roc_sso_pf_func_get(void);
+
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 877333b80c..de748ac409 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -516,6 +516,7 @@ INTERNAL {
 	roc_sso_hws_gwc_invalidate;
 	roc_sso_hws_unlink;
 	roc_sso_ns_to_gw;
+	roc_sso_pf_func_get;
 	roc_sso_rsrc_fini;
 	roc_sso_rsrc_init;
 	roc_tim_fini;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 88ea032bcb..dbebc5aef1 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -11,10 +11,7 @@
 
 #include <ethdev_driver.h>
 
-#include "roc_cpt.h"
-#include "roc_idev.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn10k_cryptodev.h"
 #include "cn10k_cryptodev_event_dp.h"
diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
index ae00af5019..8d10bc9f9b 100644
--- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
@@ -8,14 +8,7 @@
 #include <rte_ip.h>
 #include <rte_vect.h>
 
-#include "roc_cpt.h"
-#if defined(__aarch64__)
-#include "roc_io.h"
-#else
-#include "roc_io_generic.h"
-#endif
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn9k_cryptodev.h"
 #include "cn9k_cryptodev_ops.h"
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4a2c88c8c6..c7af0fac11 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -64,6 +64,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
 
 	return ws;
 }
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index b8395aa314..4f0eab8acb 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -23,6 +23,7 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
+	uint16_t xae_waes;
 	int32_t xaq_lmt;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 06ad7437d5..80077ec8a1 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cn10k_worker.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -81,7 +83,7 @@ static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
 	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
-	       ROC_SSO_XAE_PER_XAQ;
+		ws->xae_waes;
 }
 
 static inline void
@@ -394,7 +396,7 @@ cn10k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
 	int32_t space;
 
 	/* Do a common back-pressure check and return */
-	space = sso_read_xaq_space(ws) - ROC_SSO_XAE_PER_XAQ;
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
 	if (space <= 0)
 		return 0;
 	nb_events = space < nb_events ? space : nb_events;
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 84a55511a3..ab7420ab79 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -2,7 +2,7 @@
  * Copyright(C) 2021 Marvell.
  */
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
@@ -47,7 +47,7 @@ cnxk_sso_xaq_allocate(struct cnxk_sso_evdev *dev)
 	if (dev->num_events > 0)
 		xae_cnt = dev->num_events;
 	else
-		xae_cnt = dev->sso.iue;
+		xae_cnt = dev->sso.feat.iue;
 
 	if (dev->xae_cnt)
 		xae_cnt += dev->xae_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 982bbb6a9b..904a9b022d 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -21,9 +21,6 @@
 
 #include "cnxk_eventdev_dp.h"
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-
 #include "cnxk_tim_evdev.h"
 
 #define CNXK_SSO_XAE_CNT   "xae_cnt"
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index a4615c1356..311de3d92b 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -18,6 +18,8 @@
 #include <rte_random.h>
 #include <rte_test.h>
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_eventdev_stats.c b/drivers/event/cnxk/cnxk_eventdev_stats.c
index a8a87a06e4..6dea91aedf 100644
--- a/drivers/event/cnxk/cnxk_eventdev_stats.c
+++ b/drivers/event/cnxk/cnxk_eventdev_stats.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 74a6da5070..27a4dfb490 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -4,7 +4,7 @@
 
 #include <math.h>
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_tim_evdev.h"
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index db31f91818..5e96f6f188 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_tim_evdev.h"
 #include "cnxk_tim_worker.h"
 
diff --git a/drivers/event/cnxk/cnxk_worker.c b/drivers/event/cnxk/cnxk_worker.c
index 60876abcff..a07c9185d9 100644
--- a/drivers/event/cnxk/cnxk_worker.c
+++ b/drivers/event/cnxk/cnxk_worker.c
@@ -6,9 +6,7 @@
 #include <rte_pmd_cnxk_eventdev.h>
 #include <rte_eventdev.h>
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 struct pwords {
 	uint64_t u[5];
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 03/22] event/cnxk: add CN20K specific device probe
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 02/22] common/cnxk: implement SSO HW info pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 04/22] event/cnxk: add CN20K device config pbhagavatula
                               ` (19 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Nithin Dabilpuram, Kiran Kumar K,
	Sunil Kumar Kori, Satha Rao, Harman Kalra, Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add platform specific event device probe and remove, also add
event device info get function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 doc/guides/eventdevs/cnxk.rst          | 23 ++++---
 doc/guides/rel_notes/release_24_11.rst |  4 ++
 drivers/common/cnxk/roc_sso.c          | 10 ++-
 drivers/event/cnxk/cn20k_eventdev.c    | 93 ++++++++++++++++++++++++++
 drivers/event/cnxk/meson.build         |  8 ++-
 5 files changed, 124 insertions(+), 14 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.c

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index e21846f4e0..55028f889b 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -16,6 +16,7 @@ Supported OCTEON cnxk SoCs
 
 - CN9XX
 - CN10XX
+- CN20XX
 
 Features
 --------
@@ -36,7 +37,7 @@ Features of the OCTEON cnxk SSO PMD are:
   DRAM
 - HW accelerated dequeue timeout support to enable power management
 - HW managed event timers support through TIM, with high precision and
-  time granularity of 2.5us on CN9K and 1us on CN10K.
+  time granularity of 2.5us on CN9K and 1us on CN10K/CN20K.
 - Up to 256 TIM rings a.k.a event timer adapters.
 - Up to 8 rings traversed in parallel.
 - HW managed packets enqueued from ethdev to eventdev exposed through event eth
@@ -45,8 +46,8 @@ Features of the OCTEON cnxk SSO PMD are:
 - Lockfree Tx from event eth Tx adapter using ``RTE_ETH_TX_OFFLOAD_MT_LOCKFREE``
   capability while maintaining receive packet order.
 - Full Rx/Tx offload support defined through ethdev queue configuration.
-- HW managed event vectorization on CN10K for packets enqueued from ethdev to
-  eventdev configurable per each Rx queue in Rx adapter.
+- HW managed event vectorization on CN10K/CN20K for packets enqueued from ethdev
+  to eventdev configurable per each Rx queue in Rx adapter.
 - Event vector transmission via Tx adapter.
 - Up to 2 event link profiles.
 
@@ -93,13 +94,13 @@ Runtime Config Options
 
     -a 0002:0e:00.0,qos=[1-50-50]
 
-- ``CN10K WQE stashing support``
+- ``CN10K/CN20K WQE stashing support``
 
-  CN10K supports stashing the scheduled WQE carried by `rte_event` to the
-  cores L2 Dcache. The number of cache lines to be stashed and the offset
-  is configurable per HWGRP i.e. event queue. The dictionary format is as
-  follows `[Qx|stash_offset|stash_length]` here the stash offset can be
-  a negative integer.
+  CN10K/CN20K supports stashing the scheduled WQE carried by `rte_event`
+  to the cores L2 Dcache. The number of cache lines to be stashed and the
+  offset is configurable per HWGRP i.e. event queue. The dictionary format
+  is as follows `[Qx|stash_offset|stash_length]` here the stash offset can
+  be a negative integer.
   By default, stashing is enabled on queues which have been connected to
   Rx adapter. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed.
 
@@ -188,8 +189,8 @@ Runtime Config Options
 
     -a 0002:0e:00.0,tim_eclk_freq=122880000-1000000000-0
 
-Power Saving on CN10K
----------------------
+Power Saving on CN10K/CN20K
+---------------------------
 
 ARM cores can additionally use WFE when polling for transactions on SSO bus
 to save power i.e., in the event dequeue call ARM core can enter WFE and exit
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 5461798970..680d7a0199 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -231,6 +231,10 @@ New Features
 
   * Added independent enqueue feature.
 
+* **Updated Marvell cnxk event device driver.**
+
+  * Added eventdev driver support for CN20K SoC.
+
 * **Added IPv4 network order lookup in the FIB library.**
 
   A new flag field is introduced in ``rte_fib_conf`` structure.
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 8a219b985b..45cf6fc39e 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -870,7 +870,10 @@ sso_update_msix_vec_count(struct roc_sso *roc_sso, uint16_t sso_vec_cnt)
 	if (idev == NULL)
 		return -ENODEV;
 
-	mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
+	if (roc_model_is_cn20k())
+		mbox_vec_cnt = RVU_MBOX_PF_INT_VEC_AFPF_MBOX + 1;
+	else
+		mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
 
 	/* Allocating vectors for the first time */
 	if (plt_intr_max_intr_get(pci_dev->intr_handle) == 0) {
@@ -1017,7 +1020,10 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 	}
 
 	/* 2 error interrupt per TIM LF */
-	sso_vec_cnt += 2 * nb_tim_lfs;
+	if (roc_model_is_cn20k())
+		sso_vec_cnt += 3 * nb_tim_lfs;
+	else
+		sso_vec_cnt += 2 * nb_tim_lfs;
 
 	rc = sso_update_msix_vec_count(roc_sso, sso_vec_cnt);
 	if (rc < 0) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
new file mode 100644
index 0000000000..c4b80f64f3
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include "roc_api.h"
+
+#include "cnxk_eventdev.h"
+
+static void
+cn20k_sso_set_rsrc(void *arg)
+{
+	struct cnxk_sso_evdev *dev = arg;
+
+	dev->max_event_ports = dev->sso.max_hws;
+	dev->max_event_queues = dev->sso.max_hwgrp > RTE_EVENT_MAX_QUEUES_PER_DEV ?
+					RTE_EVENT_MAX_QUEUES_PER_DEV :
+					dev->sso.max_hwgrp;
+}
+
+static void
+cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	dev_info->driver_name = RTE_STR(EVENTDEV_NAME_CN20K_PMD);
+	cnxk_sso_info_get(dev, dev_info);
+	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
+}
+
+static struct eventdev_ops cn20k_sso_dev_ops = {
+	.dev_infos_get = cn20k_sso_info_get,
+};
+
+static int
+cn20k_sso_init(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = roc_plt_init();
+	if (rc < 0) {
+		plt_err("Failed to initialize platform model");
+		return rc;
+	}
+
+	event_dev->dev_ops = &cn20k_sso_dev_ops;
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	rc = cnxk_sso_init(event_dev);
+	if (rc < 0)
+		return rc;
+
+	cn20k_sso_set_rsrc(cnxk_sso_pmd_priv(event_dev));
+	if (!dev->max_event_ports || !dev->max_event_queues) {
+		plt_err("Not enough eventdev resource queues=%d ports=%d", dev->max_event_queues,
+			dev->max_event_ports);
+		cnxk_sso_fini(event_dev);
+		return -ENODEV;
+	}
+
+	plt_sso_dbg("Initializing %s max_queues=%d max_ports=%d", event_dev->data->name,
+		    dev->max_event_queues, dev->max_event_ports);
+
+	return 0;
+}
+
+static int
+cn20k_sso_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	return rte_event_pmd_pci_probe(pci_drv, pci_dev, sizeof(struct cnxk_sso_evdev),
+				       cn20k_sso_init);
+}
+
+static const struct rte_pci_id cn20k_pci_sso_map[] = {
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_PF),
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_VF),
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver cn20k_pci_sso = {
+	.id_table = cn20k_pci_sso_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
+	.probe = cn20k_sso_probe,
+	.remove = cnxk_sso_remove,
+};
+
+RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
+RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
+RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 6757af74bf..21cd5c5ae6 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -14,7 +14,7 @@ else
         soc_type = platform
 endif
 
-if soc_type != 'cn9k' and soc_type != 'cn10k'
+if soc_type != 'cn9k' and soc_type != 'cn10k' and soc_type != 'cn20k'
         soc_type = 'all'
 endif
 
@@ -229,6 +229,12 @@ sources += files(
 endif
 endif
 
+if soc_type == 'cn20k' or soc_type == 'all'
+sources += files(
+        'cn20k_eventdev.c',
+)
+endif
+
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
 if cc.get_id() == 'clang'
         extra_flags += ['-Wno-asm-operand-widths']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 04/22] event/cnxk: add CN20K device config
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 02/22] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
                               ` (18 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event device configuration that attaches the requested
number of SSO HWS(event ports) and HWGRP(event queues) LFs to
the RVU PF/VF.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index c4b80f64f3..753a976cd3 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -17,6 +17,17 @@ cn20k_sso_set_rsrc(void *arg)
 					dev->sso.max_hwgrp;
 }
 
+static int
+cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
+{
+	struct cnxk_tim_evdev *tim_dev = cnxk_tim_priv_get();
+	struct cnxk_sso_evdev *dev = arg;
+	uint16_t nb_tim_lfs;
+
+	nb_tim_lfs = tim_dev ? tim_dev->nb_rings : 0;
+	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -27,8 +38,33 @@ cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *de
 	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
 }
 
+static int
+cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = cnxk_sso_dev_validate(event_dev, 1, UINT32_MAX);
+	if (rc < 0) {
+		plt_err("Invalid event device configuration");
+		return -EINVAL;
+	}
+
+	rc = cn20k_sso_rsrc_init(dev, dev->nb_event_ports, dev->nb_event_queues);
+	if (rc < 0) {
+		plt_err("Failed to initialize SSO resources");
+		return -ENODEV;
+	}
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
+	.dev_configure = cn20k_sso_dev_configure,
+
+	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 05/22] event/cnxk: add CN20k event queue configuration
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (2 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 04/22] event/cnxk: add CN20K device config pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
                               ` (17 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add setup and release functions for event queues i.e. SSO HWGRPs.
Allocate buffers in DRAM that hold inflight events.
Register device args to modify inflight event buffer count,
HWGRP QoS and stash.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c |  2 +-
 drivers/event/cnxk/cn20k_eventdev.c | 14 ++++++++++++++
 drivers/event/cnxk/cnxk_eventdev.c  |  4 ++--
 drivers/event/cnxk/cnxk_eventdev.h  |  2 +-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index c7af0fac11..49805dd91d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -1251,7 +1251,7 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
-			      CN10K_SSO_STASH "=<string>"
+			      CNXK_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
 			      CNXK_TIM_RINGS_LMT "=<int>"
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 753a976cd3..b876c36806 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -56,6 +56,12 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 		return -ENODEV;
 	}
 
+	rc = cnxk_sso_xaq_allocate(dev);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+cnxk_rsrc_fini:
+	roc_sso_rsrc_fini(&dev->sso);
 	return rc;
 }
 
@@ -64,6 +70,10 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_configure = cn20k_sso_dev_configure,
 
 	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.queue_setup = cnxk_sso_queue_setup,
+	.queue_release = cnxk_sso_queue_release,
+	.queue_attr_set = cnxk_sso_queue_attribute_set,
+
 	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
@@ -127,3 +137,7 @@ static struct rte_pci_driver cn20k_pci_sso = {
 RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
 RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
 RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
+			      CNXK_SSO_XAE_CNT "=<int>"
+			      CNXK_SSO_GGRP_QOS "=<string>"
+			      CNXK_SSO_STASH "=<string>");
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index ab7420ab79..be6a487b59 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -624,8 +624,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
 			   &dev->force_ena_bp);
 	rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag,
 			   &single_ws);
-	rte_kvargs_process(kvlist, CN10K_SSO_STASH,
-			   &parse_sso_kvargs_stash_dict, dev);
+	rte_kvargs_process(kvlist, CNXK_SSO_STASH, &parse_sso_kvargs_stash_dict,
+			   dev);
 	dev->dual_ws = !single_ws;
 	rte_kvargs_free(kvlist);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 904a9b022d..ba08fa2173 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -27,7 +27,7 @@
 #define CNXK_SSO_GGRP_QOS  "qos"
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
-#define CN10K_SSO_STASH	   "stash"
+#define CNXK_SSO_STASH	   "stash"
 
 #define CNXK_SSO_MAX_PROFILES 2
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 06/22] event/cnxk: add CN20K event port configuration
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (3 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
                               ` (16 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HWS a.k.a event port setup, release, link, unlink
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c         |  63 ++-----
 drivers/event/cnxk/cn20k_eventdev.c         | 174 ++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h         |  26 +++
 drivers/event/cnxk/cnxk_common.h            |  55 +++++++
 drivers/event/cnxk/cnxk_eventdev.h          |   6 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c |   6 +-
 6 files changed, 276 insertions(+), 54 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.h
 create mode 100644 drivers/event/cnxk/cnxk_common.h

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 49805dd91d..43bc6c0bac 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -2,15 +2,16 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include <rte_dmadev_pmd.h>
+
+#include "cn10k_cryptodev_ops.h"
+#include "cn10k_ethdev.h"
 #include "cn10k_tx_worker.h"
 #include "cn10k_worker.h"
-#include "cn10k_ethdev.h"
-#include "cn10k_cryptodev_ops.h"
+#include "cnxk_common.h"
+#include "cnxk_dma_event_dp.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
-#include "cnxk_dma_event_dp.h"
-
-#include <rte_dmadev_pmd.h>
 
 #define CN10K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                           \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
@@ -18,29 +19,6 @@
 #define CN10K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                           \
 	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
 
-static uint32_t
-cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
-{
-	uint32_t wdata = 1;
-
-	if (dev->deq_tmo_ns)
-		wdata |= BIT(16);
-
-	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_NONE:
-	default:
-		break;
-	case CN10K_GW_MODE_PREF:
-		wdata |= BIT(19);
-		break;
-	case CN10K_GW_MODE_PREF_WFE:
-		wdata |= BIT(20) | BIT(19);
-		break;
-	}
-
-	return wdata;
-}
-
 static void *
 cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -61,7 +39,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
 	ws->hws_id = port_id;
 	ws->swtag_req = 0;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
 	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
@@ -99,7 +77,7 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->xaq_lmt = dev->xaq_lmt;
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -220,12 +198,12 @@ cn10k_sso_hws_reset(void *arg, void *hws)
 	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
 
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
@@ -504,18 +482,7 @@ cn10k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
-	switch (event_dev->data->dev_conf.preschedule_type) {
-	default:
-	case RTE_EVENT_PRESCHEDULE_NONE:
-		dev->gw_mode = CN10K_GW_MODE_NONE;
-		break;
-	case RTE_EVENT_PRESCHEDULE:
-		dev->gw_mode = CN10K_GW_MODE_PREF;
-		break;
-	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
-		break;
-	}
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
 
 	rc = cnxk_setup_event_ports(event_dev, cn10k_sso_init_hws_mem,
 				    cn10k_sso_hws_setup);
@@ -598,13 +565,13 @@ cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	/* Check if we have work in PRF_WQE0, if so extract it. */
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
 		       BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index b876c36806..611906a4f0 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,7 +4,87 @@
 
 #include "roc_api.h"
 
+#include "cn20k_eventdev.h"
+#include "cnxk_common.h"
 #include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static void *
+cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws;
+
+	/* Allocate event port memory */
+	ws = rte_zmalloc("cn20k_ws", sizeof(struct cn20k_sso_hws) + RTE_CACHE_LINE_SIZE,
+			 RTE_CACHE_LINE_SIZE);
+	if (ws == NULL) {
+		plt_err("Failed to alloc memory for port=%d", port_id);
+		return NULL;
+	}
+
+	/* First cache line is reserved for cookie */
+	ws = (struct cn20k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
+	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+	ws->hws_id = port_id;
+	ws->swtag_req = 0;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->gw_rdata = SSO_TT_EMPTY << 32;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
+
+	return ws;
+}
+
+static int
+cn20k_sso_hws_link(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_link(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static int
+cn20k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_unlink(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static void
+cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t val;
+
+	ws->grp_base = grp_base;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+	ws->xaq_lmt = dev->xaq_lmt;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
+	ws->aw_lmt = dev->sso.lmt_base;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+
+	/* Set get_work timeout for HWS */
+	val = NSEC2USEC(dev->deq_tmo_ns);
+	val = val ? val - 1 : 0;
+	plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
+}
+
+static void
+cn20k_sso_hws_release(void *arg, void *hws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint16_t i, j;
+
+	for (i = 0; i < CNXK_SSO_MAX_PROFILES; i++)
+		for (j = 0; j < dev->nb_event_queues; j++)
+			roc_sso_hws_unlink(&dev->sso, ws->hws_id, &j, 1, i, 0);
+	memset(ws, 0, sizeof(*ws));
+}
 
 static void
 cn20k_sso_set_rsrc(void *arg)
@@ -60,11 +140,98 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
+
+	rc = cnxk_setup_event_ports(event_dev, cn20k_sso_init_hws_mem, cn20k_sso_hws_setup);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+	/* Restore any prior port-queue mapping. */
+	cnxk_sso_restore_links(event_dev, cn20k_sso_hws_link);
+
+	dev->configured = 1;
+	rte_mb();
+
+	return 0;
 cnxk_rsrc_fini:
 	roc_sso_rsrc_fini(&dev->sso);
+	dev->nb_event_ports = 0;
 	return rc;
 }
 
+static int
+cn20k_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
+		     const struct rte_event_port_conf *port_conf)
+{
+
+	RTE_SET_USED(port_conf);
+	return cnxk_sso_port_setup(event_dev, port_id, cn20k_sso_hws_setup);
+}
+
+static void
+cn20k_sso_port_release(void *port)
+{
+	struct cnxk_sso_hws_cookie *gws_cookie = cnxk_sso_hws_get_cookie(port);
+	struct cnxk_sso_evdev *dev;
+
+	if (port == NULL)
+		return;
+
+	dev = cnxk_sso_pmd_priv(gws_cookie->event_dev);
+	if (!gws_cookie->configured)
+		goto free;
+
+	cn20k_sso_hws_release(dev, port);
+	memset(gws_cookie, 0, sizeof(*gws_cookie));
+free:
+	rte_free(gws_cookie);
+}
+
+static int
+cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_links];
+	uint16_t link;
+
+	RTE_SET_USED(priorities);
+	for (link = 0; link < nb_links; link++)
+		hwgrp_ids[link] = queues[link];
+	nb_links = cn20k_sso_hws_link(dev, port, hwgrp_ids, nb_links, profile);
+
+	return (int)nb_links;
+}
+
+static int
+cn20k_sso_port_unlink_profile(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+			      uint16_t nb_unlinks, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_unlinks];
+	uint16_t unlink;
+
+	for (unlink = 0; unlink < nb_unlinks; unlink++)
+		hwgrp_ids[unlink] = queues[unlink];
+	nb_unlinks = cn20k_sso_hws_unlink(dev, port, hwgrp_ids, nb_unlinks, profile);
+
+	return (int)nb_unlinks;
+}
+
+static int
+cn20k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+		    const uint8_t priorities[], uint16_t nb_links)
+{
+	return cn20k_sso_port_link_profile(event_dev, port, queues, priorities, nb_links, 0);
+}
+
+static int
+cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+		      uint16_t nb_unlinks)
+{
+	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -75,6 +242,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.queue_attr_set = cnxk_sso_queue_attribute_set,
 
 	.port_def_conf = cnxk_sso_port_def_conf,
+	.port_setup = cn20k_sso_port_setup,
+	.port_release = cn20k_sso_port_release,
+	.port_link = cn20k_sso_port_link,
+	.port_unlink = cn20k_sso_port_unlink,
+	.port_link_profile = cn20k_sso_port_link_profile,
+	.port_unlink_profile = cn20k_sso_port_unlink_profile,
+	.timeout_ticks = cnxk_sso_timeout_ticks,
 };
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
new file mode 100644
index 0000000000..5b6c558d5a
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef __CN20K_EVENTDEV_H__
+#define __CN20K_EVENTDEV_H__
+
+#define CN20K_SSO_DEFAULT_STASH_OFFSET -1
+#define CN20K_SSO_DEFAULT_STASH_LENGTH 2
+
+struct __rte_cache_aligned cn20k_sso_hws {
+	uint64_t base;
+	uint32_t gw_wdata;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+	/* Add Work Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
+	uintptr_t aw_lmt;
+	uintptr_t grp_base;
+	uint16_t xae_waes;
+	int32_t xaq_lmt;
+};
+
+#endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
new file mode 100644
index 0000000000..712d82bee7
--- /dev/null
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CNXK_COMMON_H__
+#define __CNXK_COMMON_H__
+
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static uint32_t
+cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
+{
+	uint32_t wdata = 1;
+
+	if (dev->deq_tmo_ns)
+		wdata |= BIT(16);
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	case CNXK_GW_MODE_PREF:
+		wdata |= BIT(19);
+		break;
+	case CNXK_GW_MODE_PREF_WFE:
+		wdata |= BIT(20) | BIT(19);
+		break;
+	}
+
+	return wdata;
+}
+
+static uint8_t
+cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
+{
+	uint8_t gw_mode = 0;
+
+	switch (preschedule_type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		gw_mode = CNXK_GW_MODE_NONE;
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		gw_mode = CNXK_GW_MODE_PREF;
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		gw_mode = CNXK_GW_MODE_PREF_WFE;
+		break;
+	}
+
+	return gw_mode;
+}
+
+#endif /* __CNXK_COMMON_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index ba08fa2173..4066497e6b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -38,9 +38,9 @@
 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
 #define CN9K_DUAL_WS_NB_WS	     2
 
-#define CN10K_GW_MODE_NONE     0
-#define CN10K_GW_MODE_PREF     1
-#define CN10K_GW_MODE_PREF_WFE 2
+#define CNXK_GW_MODE_NONE     0
+#define CNXK_GW_MODE_PREF     1
+#define CNXK_GW_MODE_PREF_WFE 2
 
 #define CNXK_QOS_NORMALIZE(val, min, max, cnt)                                 \
 	(min + val / ((max + cnt - 1) / cnt))
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 311de3d92b..7a3262bcff 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1568,15 +1568,15 @@ cnxk_sso_selftest(const char *dev_name)
 
 	if (roc_model_runtime_is_cn10k()) {
 		printf("Verifying CN10K workslot getwork mode none\n");
-		dev->gw_mode = CN10K_GW_MODE_NONE;
+		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF;
+		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
+		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 07/22] event/cnxk: add CN20K SSO enqueue fast path
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (4 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
                               ` (15 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh,
	Shijith Thotton, Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS fastpath event device enqueue functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  20 +-
 drivers/event/cnxk/cn20k_worker.c   | 384 ++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  21 ++
 drivers/event/cnxk/meson.build      |   1 +
 4 files changed, 425 insertions(+), 1 deletion(-)
 create mode 100644 drivers/event/cnxk/cn20k_worker.c
 create mode 100644 drivers/event/cnxk/cn20k_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 611906a4f0..a5dd03de6e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -5,6 +5,7 @@
 #include "roc_api.h"
 
 #include "cn20k_eventdev.h"
+#include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -108,6 +109,21 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+
+static void
+cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(RTE_ARCH_ARM64)
+
+	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
+	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
+	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -265,8 +281,10 @@ cn20k_sso_init(struct rte_eventdev *event_dev)
 
 	event_dev->dev_ops = &cn20k_sso_dev_ops;
 	/* For secondary processes, the primary has done all the work */
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		cn20k_sso_fp_fns_set(event_dev);
 		return 0;
+	}
 
 	rc = cnxk_sso_init(event_dev);
 	if (rc < 0)
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
new file mode 100644
index 0000000000..c7de493681
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include <rte_vect.h>
+
+#include "roc_api.h"
+
+#include "cn20k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+/* SSO Operations */
+
+static __rte_always_inline uint8_t
+cn20k_sso_hws_new_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint64_t event_ptr = ev->u64;
+	const uint16_t grp = ev->queue_id;
+
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
+	if (ws->xaq_lmt <= *ws->fc_mem)
+		return 0;
+
+	cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grp_base + (grp << 12));
+	return 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_swtag(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
+
+	/* CNXK model
+	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
+	 *
+	 * SSO_TT_ORDERED        norm           norm             untag
+	 * SSO_TT_ATOMIC         norm           norm		   untag
+	 * SSO_TT_UNTAGGED       norm           norm             NOOP
+	 */
+
+	if (new_tt == SSO_TT_UNTAGGED) {
+		if (cur_tt != SSO_TT_UNTAGGED)
+			cnxk_sso_hws_swtag_untag(ws->base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+	} else {
+		cnxk_sso_hws_swtag_norm(tag, new_tt, ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
+	}
+	ws->swtag_req = 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_group(struct cn20k_sso_hws *ws, const struct rte_event *ev, const uint16_t grp)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+
+	plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+	cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_forward_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint8_t grp = ev->queue_id;
+
+	/* Group hasn't changed, Use SWTAG to forward the event */
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
+		cn20k_sso_hws_fwd_swtag(ws, ev);
+	else
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
+		cn20k_sso_hws_fwd_group(ws, ev, grp);
+}
+
+static inline int32_t
+sso_read_xaq_space(struct cn20k_sso_hws *ws)
+{
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
+	       ws->xae_waes;
+}
+
+static inline void
+sso_lmt_aw_wait_fc(struct cn20k_sso_hws *ws, int64_t req)
+{
+	int64_t cached, refill;
+
+retry:
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
+		;
+
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
+	/* Check if there is enough space, else update and retry. */
+	if (cached < 0) {
+		/* Check if we have space else retry. */
+		do {
+			refill = sso_read_xaq_space(ws);
+		} while (refill <= 0);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
+
+		goto retry;
+	}
+}
+
+#define VECTOR_SIZE_BITS	     0xFFFFFFFFFFF80000ULL
+#define VECTOR_GET_LINE_OFFSET(line) (19 + (3 * line))
+
+static uint64_t
+vector_size_partial_mask(uint16_t off, uint16_t cnt)
+{
+	return (VECTOR_SIZE_BITS & ~(~0x0ULL << off)) | ((uint64_t)(cnt - 1) << off);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_new_event_lmtst(struct cn20k_sso_hws *ws, uint8_t queue_id,
+			      const struct rte_event ev[], uint16_t n)
+{
+	uint16_t lines, partial_line, burst, left;
+	uint64_t wdata[2], pa[2] = {0};
+	uintptr_t lmt_addr;
+	uint16_t sz0, sz1;
+	uint16_t lmt_id;
+
+	sz0 = sz1 = 0;
+	lmt_addr = ws->aw_lmt;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	left = n;
+again:
+	burst = RTE_MIN(BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2 + ROC_LMT_LINES_PER_CORE_LOG2), left);
+
+	/* Set wdata */
+	lines = burst >> ROC_SSO_AW_PER_LMT_LINE_LOG2;
+	partial_line = burst & (BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2) - 1);
+	wdata[0] = wdata[1] = 0;
+	if (lines > BIT(ROC_LMT_LINES_PER_STR_LOG2)) {
+		wdata[0] = lmt_id;
+		wdata[0] |= 15ULL << 12;
+		wdata[0] |= VECTOR_SIZE_BITS;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = 16 << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+
+		wdata[1] = lmt_id + 16;
+		pa[1] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+
+		lines -= 17;
+		wdata[1] |= partial_line ? (uint64_t)(lines + 1) << 12 : (uint64_t)(lines << 12);
+		wdata[1] |= partial_line ? vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								    partial_line) :
+					   VECTOR_SIZE_BITS;
+		sz1 = burst - sz0;
+		partial_line = 0;
+	} else if (lines) {
+		/* We need to handle two cases here:
+		 * 1. Partial line spill over to wdata[1] i.e. lines == 16
+		 * 2. Partial line with spill lines < 16.
+		 */
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = lines << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+		if (lines == 16) {
+			wdata[0] |= 15ULL << 12;
+			wdata[0] |= VECTOR_SIZE_BITS;
+			if (partial_line) {
+				wdata[1] = lmt_id + 16;
+				pa[1] = (ws->grp_base + (queue_id << 12) +
+					 SSO_LF_GGRP_OP_AW_LMTST) |
+					((partial_line - 1) << 4);
+			}
+		} else {
+			lines -= 1;
+			wdata[0] |= partial_line ? (uint64_t)(lines + 1) << 12 :
+						   (uint64_t)(lines << 12);
+			wdata[0] |= partial_line ?
+					    vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								     partial_line) :
+					    VECTOR_SIZE_BITS;
+			sz0 += partial_line;
+		}
+		sz1 = burst - sz0;
+		partial_line = 0;
+	}
+
+	/* Only partial lines */
+	if (partial_line) {
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) |
+			((partial_line - 1) << 4);
+		sz0 = partial_line;
+		sz1 = burst - sz0;
+	}
+
+#if defined(RTE_ARCH_ARM64)
+	uint64x2_t aw_mask = {0xC0FFFFFFFFULL, ~0x0ULL};
+	uint64x2_t tt_mask = {0x300000000ULL, 0};
+	uint16_t parts;
+
+	while (burst) {
+		parts = burst > 7 ? 8 : plt_align32prevpow2(burst);
+		burst -= parts;
+		/* Lets try to fill at least one line per burst. */
+		switch (parts) {
+		case 8: {
+			uint64x2_t aw0, aw1, aw2, aw3, aw4, aw5, aw6, aw7;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+			aw4 = vandq_u64(vld1q_u64((const uint64_t *)&ev[4]), aw_mask);
+			aw5 = vandq_u64(vld1q_u64((const uint64_t *)&ev[5]), aw_mask);
+			aw6 = vandq_u64(vld1q_u64((const uint64_t *)&ev[6]), aw_mask);
+			aw7 = vandq_u64(vld1q_u64((const uint64_t *)&ev[7]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+			aw4 = vorrq_u64(vandq_u64(vshrq_n_u64(aw4, 6), tt_mask), aw4);
+			aw5 = vorrq_u64(vandq_u64(vshrq_n_u64(aw5, 6), tt_mask), aw5);
+			aw6 = vorrq_u64(vandq_u64(vshrq_n_u64(aw6, 6), tt_mask), aw6);
+			aw7 = vorrq_u64(vandq_u64(vshrq_n_u64(aw7, 6), tt_mask), aw7);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 64), aw4);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 80), aw5);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 96), aw6);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 112), aw7);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 128);
+		} break;
+		case 4: {
+			uint64x2_t aw0, aw1, aw2, aw3;
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 64);
+		} break;
+		case 2: {
+			uint64x2_t aw0, aw1;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 32);
+		} break;
+		case 1: {
+			__uint128_t aw0;
+
+			aw0 = ev[0].u64;
+			aw0 <<= 64;
+			aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+			aw0 |= (uint64_t)ev[0].sched_type << 32;
+
+			*((__uint128_t *)lmt_addr) = aw0;
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+		} break;
+		}
+		ev += parts;
+	}
+#else
+	uint16_t i;
+
+	for (i = 0; i < burst; i++) {
+		__uint128_t aw0;
+
+		aw0 = ev[0].u64;
+		aw0 <<= 64;
+		aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+		aw0 |= (uint64_t)ev[0].sched_type << 32;
+		*((__uint128_t *)lmt_addr) = aw0;
+		lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+	}
+#endif
+
+	/* wdata[0] will be always valid */
+	sso_lmt_aw_wait_fc(ws, sz0);
+	roc_lmt_submit_steorl(wdata[0], pa[0]);
+	if (wdata[1]) {
+		sso_lmt_aw_wait_fc(ws, sz1);
+		roc_lmt_submit_steorl(wdata[1], pa[1]);
+	}
+
+	left -= (sz0 + sz1);
+	if (left)
+		goto again;
+
+	return n;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	switch (ev->op) {
+	case RTE_EVENT_OP_NEW:
+		return cn20k_sso_hws_new_event(ws, ev);
+	case RTE_EVENT_OP_FORWARD:
+		cn20k_sso_hws_forward_event(ws, ev);
+		break;
+	case RTE_EVENT_OP_RELEASE:
+		if (ws->swtag_req) {
+			cnxk_sso_hws_desched(ev->u64, ws->base);
+			ws->swtag_req = 0;
+			break;
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	uint16_t idx = 0, done = 0, rc = 0;
+	struct cn20k_sso_hws *ws = port;
+	uint8_t queue_id;
+	int32_t space;
+
+	/* Do a common back-pressure check and return */
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
+	if (space <= 0)
+		return 0;
+	nb_events = space < nb_events ? space : nb_events;
+
+	do {
+		queue_id = ev[idx].queue_id;
+		for (idx = idx + 1; idx < nb_events; idx++)
+			if (queue_id != ev[idx].queue_id)
+				break;
+
+		rc = cn20k_sso_hws_new_event_lmtst(ws, queue_id, &ev[done], idx - done);
+		if (rc != (idx - done))
+			return rc + done;
+		done += rc;
+
+	} while (done < nb_events);
+
+	return done;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	cn20k_sso_hws_forward_event(ws, ev);
+
+	return 1;
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
new file mode 100644
index 0000000000..5ff8f11b38
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_WORKER_H__
+#define __CN20K_WORKER_H__
+
+#include <rte_eventdev.h>
+
+#include "cnxk_worker.h"
+#include "cn20k_eventdev.h"
+
+/* CN20K Fastpath functions. */
+uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
+					   uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 21cd5c5ae6..d0dc2320e1 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -232,6 +232,7 @@ endif
 if soc_type == 'cn20k' or soc_type == 'all'
 sources += files(
         'cn20k_eventdev.c',
+        'cn20k_worker.c',
 )
 endif
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 08/22] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (5 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
                               ` (14 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS event dequeue fastpath functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |   5 +
 drivers/event/cnxk/cn20k_worker.c   |  54 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   | 137 +++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a5dd03de6e..d1668a00c1 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -114,11 +114,16 @@ static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
+	if (dev->deq_tmo_ns)
+		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
+
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index c7de493681..2dcde0b444 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -382,3 +382,57 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 
 	return 1;
 }
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(timeout_ticks);
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return 1;
+	}
+
+	return cn20k_sso_hws_get_work(ws, ev, 0);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, 0);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, 0);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5ff8f11b38..8dc60a06ec 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -7,8 +7,136 @@
 
 #include <rte_eventdev.h>
 
-#include "cnxk_worker.h"
 #include "cn20k_eventdev.h"
+#include "cnxk_worker.h"
+
+static __rte_always_inline void
+cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
+{
+	RTE_SET_USED(ws);
+	RTE_SET_USED(flags);
+
+	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
+		 (u64[0] & 0xffffffff);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+	gw.get_work = ws->gw_wdata;
+#if defined(RTE_ARCH_ARM64)
+#if defined(__clang__)
+	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbz %[x0], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(".arch armv8-a+lse\n"
+		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+	gw.u64[0] = x0;
+	gw.u64[1] = x1;
+#else
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbz %[wdata], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [wdata] "=&r"(gw.get_work)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+		     : [wdata] "+r"(gw.get_work)
+		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+#endif
+#else
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
+#endif
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
+
+/* Used in cleaning up workslot. */
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work_empty(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+#ifdef RTE_ARCH_ARM64
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbz %[tag], 63, .Ldone%=		\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbnz %[tag], 63, .Lrty%=		\n"
+		     ".Ldone%=:	dmb ld					\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
+		     : "memory");
+#else
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+#endif
+
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+	else
+		gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+			    (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
 
 /* CN20K Fastpath functions. */
 uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
@@ -18,4 +146,11 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 
+uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+					   uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
+					       uint16_t nb_events, uint64_t timeout_ticks);
+
 #endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 09/22] event/cnxk: add CN20K event port quiesce
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (6 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
                               ` (13 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port quiesce function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 60 +++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index d1668a00c1..56e3eb87fb 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -208,6 +208,65 @@ cn20k_sso_port_release(void *port)
 	rte_free(gws_cookie);
 }
 
+static void
+cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+		       rte_eventdev_port_flush_t flush_cb, void *args)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct cn20k_sso_hws *ws = port;
+	struct rte_event ev;
+	uint64_t ptag;
+	bool is_pend;
+
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+		is_pend = true;
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	if (is_pend && ev.u64)
+		if (flush_cb)
+			flush_cb(event_dev->data->dev_id, ev, args);
+	ptag = (plt_read64(ws->base + SSOW_LF_GWS_TAG) >> 32) & SSO_TT_EMPTY;
+	if (ptag != SSO_TT_EMPTY)
+		cnxk_sso_hws_swtag_flush(ws->base);
+
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & BIT_ULL(56));
+
+	/* Check if we have work in PRF_WQE0, if so extract it. */
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (ev.u64) {
+			if (flush_cb)
+				flush_cb(event_dev->data->dev_id, ev, args);
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		do {
+			ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (ptag & BIT_ULL(56));
+	}
+	ws->swtag_req = 0;
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
 			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
@@ -265,6 +324,7 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_def_conf = cnxk_sso_port_def_conf,
 	.port_setup = cn20k_sso_port_setup,
 	.port_release = cn20k_sso_port_release,
+	.port_quiesce = cn20k_sso_port_quiesce,
 	.port_link = cn20k_sso_port_link,
 	.port_unlink = cn20k_sso_port_unlink,
 	.port_link_profile = cn20k_sso_port_link_profile,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 10/22] event/cnxk: add CN20K event port profile switch
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (7 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
                               ` (12 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port profile switch.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  1 +
 drivers/event/cnxk/cn20k_worker.c   | 11 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  1 +
 3 files changed, 13 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 56e3eb87fb..53b0b43199 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -124,6 +124,7 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 	if (dev->deq_tmo_ns)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
+	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2dcde0b444..2c723523d2 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -383,6 +383,17 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 	return 1;
 }
 
+int __rte_hot
+cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(0xFFUL);
+	ws->gw_wdata |= (profile + 1);
+
+	return 0;
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 8dc60a06ec..447f28f0f2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -145,6 +145,7 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 					       uint16_t nb_events);
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
+int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 11/22] event/cnxk: add CN20K event port preschedule
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (8 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 12/22] event/cnxk: add CN20K device start pbhagavatula
                               ` (11 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port preschedule modify and preschedule
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/ssow.h       |  1 +
 drivers/event/cnxk/cn20k_eventdev.c |  2 ++
 drivers/event/cnxk/cn20k_worker.c   | 30 +++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  3 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
index c146a8c3ef..ec6bd7896b 100644
--- a/drivers/common/cnxk/hw/ssow.h
+++ b/drivers/common/cnxk/hw/ssow.h
@@ -37,6 +37,7 @@
 #define SSOW_LF_GWS_PRF_WQE1	     (0x448ull) /* [CN10K, .) */
 #define SSOW_LF_GWS_OP_GET_WORK0     (0x600ull)
 #define SSOW_LF_GWS_OP_GET_WORK1     (0x608ull) /* [CN10K, .) */
+#define SSOW_LF_GWS_OP_PRF_GETWORK   (0x610ull) /* [CN20K, .) */
 #define SSOW_LF_GWS_OP_SWTAG_FLUSH   (0x800ull)
 #define SSOW_LF_GWS_OP_SWTAG_UNTAG   (0x810ull)
 #define SSOW_LF_GWS_OP_SWTP_CLR	     (0x820ull)
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 53b0b43199..a788eeed63 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -125,6 +125,8 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
+	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
+	event_dev->preschedule = cn20k_sso_hws_preschedule;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2c723523d2..ebfe863bc5 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -394,6 +394,36 @@ cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
 	return 0;
 }
 
+int __rte_hot
+cn20k_sso_hws_preschedule_modify(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(BIT(19) | BIT(20));
+	switch (type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		ws->gw_wdata |= BIT(19);
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		ws->gw_wdata |= BIT(19) | BIT(20);
+		break;
+	}
+
+	return 0;
+}
+
+void __rte_hot
+cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(type);
+	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 447f28f0f2..dd8b72bc53 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -146,6 +146,9 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
+int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
+					       enum rte_event_dev_preschedule_type type);
+void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 12/22] event/cnxk: add CN20K device start
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (9 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
                               ` (10 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K start function along with few cleanup API's to maintain
sanity.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c | 103 +--------------------------
 drivers/event/cnxk/cn20k_eventdev.c |  76 ++++++++++++++++++++
 drivers/event/cnxk/cnxk_common.h    | 104 ++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 100 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 43bc6c0bac..f2e591f547 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -154,83 +154,6 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
 	return 0;
 }
 
-static void
-cn10k_sso_hws_reset(void *arg, void *hws)
-{
-	struct cnxk_sso_evdev *dev = arg;
-	struct cn10k_sso_hws *ws = hws;
-	uintptr_t base = ws->base;
-	uint64_t pend_state;
-	union {
-		__uint128_t wdata;
-		uint64_t u64[2];
-	} gw;
-	uint8_t pend_tt;
-	bool is_pend;
-
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
-	/* Wait till getwork/swtp/waitw/desched completes. */
-	is_pend = false;
-	/* Work in WQE0 is always consumed, unless its a SWTAG. */
-	pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
-	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
-	    ws->swtag_req)
-		is_pend = true;
-
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
-			       BIT_ULL(56) | BIT_ULL(54)));
-	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
-			cnxk_sso_hws_swtag_untag(base +
-						 SSOW_LF_GWS_OP_SWTAG_UNTAG);
-		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-	} else if (pend_tt != SSO_TT_EMPTY) {
-		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-	}
-
-	/* Wait for desched to complete. */
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
-
-	switch (dev->gw_mode) {
-	case CNXK_GW_MODE_PREF:
-	case CNXK_GW_MODE_PREF_WFE:
-		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
-			;
-		break;
-	case CNXK_GW_MODE_NONE:
-	default:
-		break;
-	}
-
-	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
-	    SSO_TT_EMPTY) {
-		plt_write64(BIT_ULL(16) | 1,
-			    ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		do {
-			roc_load_pair(gw.u64[0], gw.u64[1],
-				      ws->base + SSOW_LF_GWS_WQE0);
-		} while (gw.u64[0] & BIT_ULL(63));
-		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-			if (pend_tt == SSO_TT_ATOMIC ||
-			    pend_tt == SSO_TT_ORDERED)
-				cnxk_sso_hws_swtag_untag(
-					base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
-			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-		}
-	}
-
-	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	rte_mb();
-}
-
 static void
 cn10k_sso_set_rsrc(void *arg)
 {
@@ -640,24 +563,6 @@ cn10k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn10k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
-static void
-cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev)
-{
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
-	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
-	int i, rc;
-
-	plt_sso_dbg();
-	for (i = 0; i < dev->stash_cnt; i++) {
-		stash[i].hwgrp = dev->stash_parse_data[i].queue;
-		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
-		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
-	}
-	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
-	if (rc < 0)
-		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
-}
-
 static int
 cn10k_sso_start(struct rte_eventdev *event_dev)
 {
@@ -669,9 +574,8 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
 	if (rc < 0)
 		return rc;
 
-	cn10k_sso_configure_queue_stash(event_dev);
-	rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
-			    cn10k_sso_hws_flush_events);
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 	if (rc < 0)
 		return rc;
 	cn10k_sso_fp_fns_set(event_dev);
@@ -692,8 +596,7 @@ cn10k_sso_stop(struct rte_eventdev *event_dev)
 	for (i = 0; i < event_dev->data->nb_ports; i++)
 		hws[i] = i;
 	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
-	cnxk_sso_stop(event_dev, cn10k_sso_hws_reset,
-		      cn10k_sso_hws_flush_events);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 }
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a788eeed63..69c593ed60 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -87,6 +87,61 @@ cn20k_sso_hws_release(void *arg, void *hws)
 	memset(ws, 0, sizeof(*ws));
 }
 
+static int
+cn20k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cnxk_handle_event_t fn,
+			   void *arg)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg);
+	uint64_t retry = CNXK_SSO_FLUSH_RETRY_MAX;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t cq_ds_cnt = 1;
+	uint64_t aq_cnt = 1;
+	uint64_t ds_cnt = 1;
+	struct rte_event ev;
+	uint64_t val, req;
+
+	plt_write64(0, base + SSO_LF_GGRP_QCTL);
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	req = queue_id;	    /* GGRP ID */
+	req |= BIT_ULL(18); /* Grouped */
+	req |= BIT_ULL(16); /* WAIT */
+
+	aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+	ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+	cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+	cq_ds_cnt &= 0x3FFF3FFF0000;
+
+	while (aq_cnt || cq_ds_cnt || ds_cnt) {
+		plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (fn != NULL && ev.u64 != 0)
+			fn(arg, ev);
+		if (ev.sched_type != SSO_TT_EMPTY)
+			cnxk_sso_hws_swtag_flush(ws->base);
+		else if (retry-- == 0)
+			break;
+		do {
+			val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (val & BIT_ULL(56));
+		aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+		ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+		cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+		/* Extract cq and ds count */
+		cq_ds_cnt &= 0x3FFF3FFF0000;
+	}
+
+	if (aq_cnt || cq_ds_cnt || ds_cnt)
+		return -EAGAIN;
+
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	rte_mb();
+
+	return 0;
+}
+
 static void
 cn20k_sso_set_rsrc(void *arg)
 {
@@ -315,6 +370,25 @@ cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
+static int
+cn20k_sso_start(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int rc, i;
+
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set(event_dev);
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -333,6 +407,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_link_profile = cn20k_sso_port_link_profile,
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
+
+	.dev_start = cn20k_sso_start,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
index 712d82bee7..c361d0530d 100644
--- a/drivers/event/cnxk/cnxk_common.h
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -8,6 +8,15 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+struct cnxk_sso_hws_prf {
+	uint64_t base;
+	uint32_t gw_wdata;
+	void *lookup_mem;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+};
+
 static uint32_t
 cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
 {
@@ -52,4 +61,99 @@ cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
 	return gw_mode;
 }
 
+static void
+cnxk_sso_hws_reset(void *arg, void *ws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cnxk_sso_hws_prf *ws_prf;
+	uint64_t pend_state;
+	uint8_t swtag_req;
+	uintptr_t base;
+	uint8_t hws_id;
+	union {
+		__uint128_t wdata;
+		uint64_t u64[2];
+	} gw;
+	uint8_t pend_tt;
+	bool is_pend;
+
+	ws_prf = ws;
+	base = ws_prf->base;
+	hws_id = ws_prf->hws_id;
+	swtag_req = ws_prf->swtag_req;
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	/* Wait till getwork/swtp/waitw/desched completes. */
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || swtag_req)
+		is_pend = true;
+
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state &
+		 (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+			cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+	} else if (pend_tt != SSO_TT_EMPTY) {
+		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+	}
+
+	/* Wait for desched to complete. */
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, base + SSOW_LF_GWS_OP_GET_WORK0);
+		do {
+			roc_load_pair(gw.u64[0], gw.u64[1], base + SSOW_LF_GWS_WQE0);
+		} while (gw.u64[0] & BIT_ULL(63));
+		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+			if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+				cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+		}
+	}
+
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	rte_mb();
+}
+
+static void
+cnxk_sso_configure_queue_stash(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
+	int i, rc;
+
+	plt_sso_dbg();
+	for (i = 0; i < dev->stash_cnt; i++) {
+		stash[i].hwgrp = dev->stash_parse_data[i].queue;
+		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
+		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
+	}
+	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
+	if (rc < 0)
+		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+}
+
 #endif /* __CNXK_COMMON_H__ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 13/22] event/cnxk: add CN20K device stop and close
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (10 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 12/22] event/cnxk: add CN20K device start pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
                               ` (9 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event device stop and close callback functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 69c593ed60..6195b29705 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -389,6 +389,25 @@ cn20k_sso_start(struct rte_eventdev *event_dev)
 	return rc;
 }
 
+static void
+cn20k_sso_stop(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int i;
+
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+}
+
+static int
+cn20k_sso_close(struct rte_eventdev *event_dev)
+{
+	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -409,6 +428,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
 	.dev_start = cn20k_sso_start,
+	.dev_stop = cn20k_sso_stop,
+	.dev_close = cn20k_sso_close,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 14/22] event/cnxk: add CN20K xstats, selftest and dump
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (11 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
                               ` (8 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add selftest to verify SSO, xstats to get queue specific
stats and add function to dump internal state of SSO.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test/test_eventdev.c                    |  7 +++++++
 drivers/event/cnxk/cn20k_eventdev.c         | 12 ++++++++++++
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  8 ++++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c
index b03a62fe70..e97754bd47 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -1521,6 +1521,12 @@ test_eventdev_selftest_cn10k(void)
 	return test_eventdev_selftest_impl("event_cn10k", "");
 }
 
+static int
+test_eventdev_selftest_cn20k(void)
+{
+	return test_eventdev_selftest_impl("event_cn20k", "");
+}
+
 #endif /* !RTE_EXEC_ENV_WINDOWS */
 
 REGISTER_FAST_TEST(eventdev_common_autotest, true, true, test_eventdev_common);
@@ -1532,5 +1538,6 @@ REGISTER_DRIVER_TEST(eventdev_selftest_dpaa2, test_eventdev_selftest_dpaa2);
 REGISTER_DRIVER_TEST(eventdev_selftest_dlb2, test_eventdev_selftest_dlb2);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn9k, test_eventdev_selftest_cn9k);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn10k, test_eventdev_selftest_cn10k);
+REGISTER_DRIVER_TEST(eventdev_selftest_cn20k, test_eventdev_selftest_cn20k);
 
 #endif /* !RTE_EXEC_ENV_WINDOWS */
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 6195b29705..793098bd61 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -408,6 +408,12 @@ cn20k_sso_close(struct rte_eventdev *event_dev)
 	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
 }
 
+static int
+cn20k_sso_selftest(void)
+{
+	return cnxk_sso_selftest(RTE_STR(event_cn20k));
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -427,9 +433,15 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.xstats_get = cnxk_sso_xstats_get,
+	.xstats_reset = cnxk_sso_xstats_reset,
+	.xstats_get_names = cnxk_sso_xstats_get_names,
+
+	.dump = cnxk_sso_dump,
 	.dev_start = cn20k_sso_start,
 	.dev_stop = cn20k_sso_stop,
 	.dev_close = cn20k_sso_close,
+	.dev_selftest = cn20k_sso_selftest,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 7a3262bcff..8f3d0982e9 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1566,16 +1566,16 @@ cnxk_sso_selftest(const char *dev_name)
 			return rc;
 	}
 
-	if (roc_model_runtime_is_cn10k()) {
-		printf("Verifying CN10K workslot getwork mode none\n");
+	if (roc_model_runtime_is_cn10k() || roc_model_runtime_is_cn20k()) {
+		printf("Verifying %s workslot getwork mode none\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode prefetch\n");
+		printf("Verifying %s workslot getwork mode prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
+		printf("Verifying %s workslot getwork mode smart prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 15/22] event/cnxk: support CN20K Rx adapter
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (12 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
                               ` (7 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for CN20K event eth Rx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 121 +++++++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_eventdev.h |   4 +
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 793098bd61..602fbd6359 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,6 +4,7 @@
 
 #include "roc_api.h"
 
+#include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
@@ -414,6 +415,117 @@ cn20k_sso_selftest(void)
 	return cnxk_sso_selftest(RTE_STR(event_cn20k));
 }
 
+static int
+cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+			      const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+	int rc;
+
+	RTE_SET_USED(event_dev);
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 9);
+	if (rc)
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+	else
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+	return 0;
+}
+
+static void
+cn20k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		ws->xaq_lmt = dev->xaq_lmt;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+		ws->tstamp = dev->tstamp;
+		if (lookup_mem)
+			ws->lookup_mem = lookup_mem;
+	}
+}
+
+static void
+eventdev_fops_tstamp_update(struct rte_eventdev *event_dev)
+{
+	struct rte_event_fp_ops *fp_op = rte_event_fp_ops + event_dev->data->dev_id;
+
+	fp_op->dequeue_burst = event_dev->dequeue_burst;
+}
+
+static void
+cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct cnxk_eth_dev *cnxk_eth_dev = dev->data->dev_private;
+	struct rte_eventdev *event_dev = cnxk_eth_dev->evdev_priv;
+	struct cnxk_sso_evdev *evdev = cnxk_sso_pmd_priv(event_dev);
+
+	evdev->rx_offloads |= flags;
+	if (ptp_en)
+		evdev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
+	else
+		evdev->tstamp[port_id] = NULL;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	eventdev_fops_tstamp_update(event_dev);
+}
+
+static int
+cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
+			       const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash;
+	struct cn20k_eth_rxq *rxq;
+	void *lookup_mem;
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	if (rc)
+		return -EINVAL;
+
+	cnxk_eth_dev->cnxk_sso_ptp_tstamp_cb = cn20k_sso_tstamp_hdl_update;
+	cnxk_eth_dev->evdev_priv = (struct rte_eventdev *)(uintptr_t)event_dev;
+
+	rxq = eth_dev->data->rx_queues[0];
+	lookup_mem = rxq->lookup_mem;
+	cn20k_sso_set_priv_mem(event_dev, lookup_mem);
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	if (roc_feature_sso_has_stash() && dev->nb_event_ports > 1) {
+		stash.hwgrp = queue_conf->ev.queue_id;
+		stash.stash_offset = CN20K_SSO_DEFAULT_STASH_OFFSET;
+		stash.stash_count = CN20K_SSO_DEFAULT_STASH_LENGTH;
+		rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1);
+		if (rc < 0)
+			plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+	}
+
+	return 0;
+}
+
+static int
+cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id)
+{
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -433,6 +545,12 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.eth_rx_adapter_caps_get = cn20k_sso_rx_adapter_caps_get,
+	.eth_rx_adapter_queue_add = cn20k_sso_rx_adapter_queue_add,
+	.eth_rx_adapter_queue_del = cn20k_sso_rx_adapter_queue_del,
+	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -509,4 +627,5 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
-			      CNXK_SSO_STASH "=<string>");
+			      CNXK_SSO_STASH "=<string>"
+			      CNXK_SSO_FORCE_BP "=1");
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 5b6c558d5a..7a6363a89e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -11,9 +11,13 @@
 struct __rte_cache_aligned cn20k_sso_hws {
 	uint64_t base;
 	uint32_t gw_wdata;
+	void *lookup_mem;
 	uint64_t gw_rdata;
 	uint8_t swtag_req;
 	uint8_t hws_id;
+	/* PTP timestamp */
+	struct cnxk_timesync_info **tstamp;
+	uint64_t meta_aura;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
 	int64_t __rte_atomic *fc_cache_space;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 16/22] event/cnxk: support CN20K Rx adapter fast path
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (13 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
                               ` (6 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c           | 122 ++++++++++++-
 drivers/event/cnxk/cn20k_worker.c             |  54 ------
 drivers/event/cnxk/cn20k_worker.h             | 165 +++++++++++++++++-
 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c |  22 +++
 .../cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c   |  22 +++
 .../event/cnxk/deq/cn20k/deq_112_127_burst.c  |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_seg_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_tmo_burst.c    |  22 +++
 .../deq/cn20k/deq_112_127_tmo_seg_burst.c     |  22 +++
 .../event/cnxk/deq/cn20k/deq_16_31_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_32_47_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_burst.c      |  23 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_48_63_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_64_79_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_80_95_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_96_111_burst.c   |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_seg_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_all_offload.c    |  65 +++++++
 drivers/event/cnxk/meson.build                |  43 +++++
 37 files changed, 1085 insertions(+), 69 deletions(-)
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 602fbd6359..408014036a 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -11,6 +11,9 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+#define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
+	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -165,21 +168,124 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+#if defined(RTE_ARCH_ARM64)
+static inline void
+cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
+{
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	const event_dequeue_burst_t sso_hws_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_seg_burst);
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_seg_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_deq_seg_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_seg_burst);
+		}
+	} else {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst, sso_hws_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_burst);
+		}
+	}
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
+static inline void
+cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
+	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
+		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+#endif
 
 static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	cn20k_sso_fp_blk_fns_set(event_dev);
+	cn20k_sso_fp_tmplt_fns_set(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
-	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
-	if (dev->deq_tmo_ns)
-		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
-
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
 	event_dev->preschedule = cn20k_sso_hws_preschedule;
@@ -286,7 +392,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
 	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
 
-	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	cn20k_sso_hws_get_work_empty(ws, &ev,
+				     (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 	if (is_pend && ev.u64)
 		if (flush_cb)
 			flush_cb(event_dev->data->dev_id, ev, args);
@@ -312,7 +419,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
 		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		cn20k_sso_hws_get_work_empty(
+			ws, &ev, (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 		if (ev.u64) {
 			if (flush_cb)
 				flush_cb(event_dev->data->dev_id, ev, args);
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index ebfe863bc5..53daf3b4b0 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -423,57 +423,3 @@ cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
 	RTE_SET_USED(type);
 	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
 }
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return 1;
-	}
-
-	return cn20k_sso_hws_get_work(ws, ev, 0);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return ret;
-	}
-
-	ret = cn20k_sso_hws_get_work(ws, ev, 0);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn20k_sso_hws_get_work(ws, ev, 0);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			    uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index dd8b72bc53..9075073fd2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -8,16 +8,64 @@
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
+#include "cn20k_rx.h"
 #include "cnxk_worker.h"
 
+/* CN20K Rx event fastpath */
+
+static __rte_always_inline void
+cn20k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id, const uint32_t tag,
+		  const uint32_t flags, const void *const lookup_mem, uintptr_t cpth,
+		  uintptr_t sa_base)
+{
+	const uint64_t mbuf_init =
+		0x100010000ULL | RTE_PKTMBUF_HEADROOM | (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+	struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf;
+
+	cn20k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, (struct rte_mbuf *)mbuf, lookup_mem,
+			      mbuf_init | ((uint64_t)port_id) << 48, cpth, sa_base, flags);
+}
+
+static void
+cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info *tstamp)
+{
+	uint64_t tstamp_ptr;
+	uint8_t laptr;
+
+	laptr = (uint8_t)*(uint64_t *)(u64 + (CNXK_SSO_WQE_LAYR_PTR * sizeof(uint64_t)));
+	if (laptr == sizeof(uint64_t)) {
+		/* Extracting tstamp, if PTP enabled*/
+		tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64) + CNXK_SSO_WQE_SG_PTR);
+		cn20k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, true,
+					 (uint64_t *)tstamp_ptr);
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
-	RTE_SET_USED(ws);
-	RTE_SET_USED(flags);
+	uintptr_t sa_base = 0;
 
 	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
 		 (u64[0] & 0xffffffff);
+	if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		uintptr_t cpth = 0;
+		uint64_t mbuf;
+
+		mbuf = u64[1] - sizeof(struct rte_mbuf);
+		rte_prefetch0((void *)mbuf);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(((struct rte_mbuf *)mbuf)->pool, (void **)&mbuf, 1, 1);
+
+		u64[0] = CNXK_CLR_SUB_EVENT(u64[0]);
+		cn20k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xFFFFF, flags, ws->lookup_mem, cpth,
+				  sa_base);
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
+		u64[1] = mbuf;
+	}
 }
 
 static __rte_always_inline uint16_t
@@ -150,11 +198,112 @@ int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
 					       enum rte_event_dev_preschedule_type type);
 void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
-uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-					   uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-					       uint16_t nb_events, uint64_t timeout_ticks);
+#define R(name, flags)                                                                             \
+	uint16_t __rte_hot cn20k_sso_hws_deq_burst_##name(                                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_burst_##name(                                      \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_seg_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_seg_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_seg_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_seg_burst_##name(                              \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_burst_##name(                                    \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_seg_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_seg_burst_##name(                            \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_seg_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_seg_burst_##name(                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_DEQ(fn, flags)                                                                         \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(timeout_ticks);                                                       \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return 1;                                                                  \
+		}                                                                                  \
+		return cn20k_sso_hws_get_work(ws, ev, flags);                                      \
+	}
+
+#define SSO_DEQ_SEG(fn, flags) SSO_DEQ(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define SSO_DEQ_TMO(fn, flags)                                                                     \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint16_t ret = 1;                                                                  \
+		uint64_t iter;                                                                     \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return ret;                                                                \
+		}                                                                                  \
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);                                       \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)                         \
+			ret = cn20k_sso_hws_get_work(ws, ev, flags);                               \
+		return ret;                                                                        \
+	}
+
+#define SSO_DEQ_TMO_SEG(fn, flags) SSO_DEQ_TMO(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define R(name, flags)                                                                             \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                                                   \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)                              \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                                           \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                                           \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)                                   \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_CMN_DEQ_BURST(fnb, fn, flags)                                                          \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+#define SSO_CMN_DEQ_SEG_BURST(fnb, fn, flags)                                                      \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[],
+						       uint16_t nb_events, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[],
+							   uint16_t nb_events,
+							   uint64_t timeout_ticks);
 
 #endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
new file mode 100644
index 0000000000..f7e0e8fe71
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
new file mode 100644
index 0000000000..7d5d4823c3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		      cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
new file mode 100644
index 0000000000..1bdc4bc82d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
new file mode 100644
index 0000000000..d3ed5fcac0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                                             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,                                  \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)                                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,                             \
+			  cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
new file mode 100644
index 0000000000..29c21441cf
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
new file mode 100644
index 0000000000..004b5ecb95
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
new file mode 100644
index 0000000000..d544b39e9e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
new file mode 100644
index 0000000000..ba7a1207ad
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
new file mode 100644
index 0000000000..eb7382e9d9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
new file mode 100644
index 0000000000..770b7221e6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
new file mode 100644
index 0000000000..1e71d22fc3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
new file mode 100644
index 0000000000..1a9e7efa0a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
new file mode 100644
index 0000000000..3d51bd6659
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
new file mode 100644
index 0000000000..851b5b7d31
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
new file mode 100644
index 0000000000..038ba726a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name,                   \
+			  flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
new file mode 100644
index 0000000000..68fb3ff53d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
new file mode 100644
index 0000000000..84f3ccd39c
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
new file mode 100644
index 0000000000..417f622412
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
new file mode 100644
index 0000000000..7fbea69134
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
new file mode 100644
index 0000000000..3bee216768
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
new file mode 100644
index 0000000000..9b341a0df5
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
new file mode 100644
index 0000000000..1f051f74a9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			   cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
new file mode 100644
index 0000000000..c134e27f25
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
new file mode 100644
index 0000000000..849e8e12fc
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		 cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
new file mode 100644
index 0000000000..9724caf5d6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
new file mode 100644
index 0000000000..997c208511
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
new file mode 100644
index 0000000000..bcf32e646b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
new file mode 100644
index 0000000000..b24e73439a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
new file mode 100644
index 0000000000..c03d034b66
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
new file mode 100644
index 0000000000..b37ef7a998
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
new file mode 100644
index 0000000000..da76b589a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
new file mode 100644
index 0000000000..3a8c02e4d2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_all_offload.c b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
new file mode 100644
index 0000000000..3983736b7e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[], uint16_t nb_events,
+				    uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[], uint16_t nb_events,
+					uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_TSTAMP_F | NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index d0dc2320e1..a2bafab268 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -234,6 +234,49 @@ sources += files(
         'cn20k_eventdev.c',
         'cn20k_worker.c',
 )
+
+if host_machine.cpu_family().startswith('aarch') and not disable_template
+sources += files(
+        'deq/cn20k/deq_0_15_burst.c',
+        'deq/cn20k/deq_16_31_burst.c',
+        'deq/cn20k/deq_32_47_burst.c',
+        'deq/cn20k/deq_48_63_burst.c',
+        'deq/cn20k/deq_64_79_burst.c',
+        'deq/cn20k/deq_80_95_burst.c',
+        'deq/cn20k/deq_96_111_burst.c',
+        'deq/cn20k/deq_112_127_burst.c',
+        'deq/cn20k/deq_0_15_seg_burst.c',
+        'deq/cn20k/deq_16_31_seg_burst.c',
+        'deq/cn20k/deq_32_47_seg_burst.c',
+        'deq/cn20k/deq_48_63_seg_burst.c',
+        'deq/cn20k/deq_64_79_seg_burst.c',
+        'deq/cn20k/deq_80_95_seg_burst.c',
+        'deq/cn20k/deq_96_111_seg_burst.c',
+        'deq/cn20k/deq_112_127_seg_burst.c',
+        'deq/cn20k/deq_0_15_tmo_burst.c',
+        'deq/cn20k/deq_16_31_tmo_burst.c',
+        'deq/cn20k/deq_32_47_tmo_burst.c',
+        'deq/cn20k/deq_48_63_tmo_burst.c',
+        'deq/cn20k/deq_64_79_tmo_burst.c',
+        'deq/cn20k/deq_80_95_tmo_burst.c',
+        'deq/cn20k/deq_96_111_tmo_burst.c',
+        'deq/cn20k/deq_112_127_tmo_burst.c',
+        'deq/cn20k/deq_0_15_tmo_seg_burst.c',
+        'deq/cn20k/deq_16_31_tmo_seg_burst.c',
+        'deq/cn20k/deq_32_47_tmo_seg_burst.c',
+        'deq/cn20k/deq_48_63_tmo_seg_burst.c',
+        'deq/cn20k/deq_64_79_tmo_seg_burst.c',
+        'deq/cn20k/deq_80_95_tmo_seg_burst.c',
+        'deq/cn20k/deq_96_111_tmo_seg_burst.c',
+        'deq/cn20k/deq_112_127_tmo_seg_burst.c',
+        'deq/cn20k/deq_all_offload.c',
+)
+
+else
+sources += files(
+        'deq/cn20k/deq_all_offload.c',
+)
+endif
 endif
 
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 17/22] event/cnxk: support CN20K Tx adapter
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (14 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
                               ` (5 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c  | 126 +++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h  |   4 +
 drivers/event/cnxk/cn20k_tx_worker.h |  16 ++++
 3 files changed, 146 insertions(+)
 create mode 100644 drivers/event/cnxk/cn20k_tx_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 408014036a..509c6ea630 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -6,6 +6,7 @@
 
 #include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
+#include "cn20k_tx_worker.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
@@ -168,6 +169,35 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+static int
+cn20k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	if (dev->tx_adptr_data == NULL)
+		return 0;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		void *ws_cookie;
+
+		ws_cookie = cnxk_sso_hws_get_cookie(ws);
+		ws_cookie = rte_realloc_socket(ws_cookie,
+					       sizeof(struct cnxk_sso_hws_cookie) +
+						       sizeof(struct cn20k_sso_hws) +
+						       dev->tx_adptr_data_sz,
+					       RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+		if (ws_cookie == NULL)
+			return -ENOMEM;
+		ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+		memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, dev->tx_adptr_data_sz);
+		event_dev->data->ports[i] = ws;
+	}
+
+	return 0;
+}
+
 #if defined(RTE_ARCH_ARM64)
 static inline void
 cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
@@ -634,6 +664,95 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+			      uint32_t *caps)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		*caps = 0;
+	else
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+	return 0;
+}
+
+static void
+cn20k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cn20k_eth_txq *txq;
+	struct roc_nix_sq *sq;
+	int i;
+
+	if (tx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+			cn20k_sso_txq_fc_update(eth_dev, i);
+	} else {
+		uint16_t sqes_per_sqb;
+
+		sq = &cnxk_eth_dev->sqs[tx_queue_id];
+		txq = eth_dev->data->tx_queues[tx_queue_id];
+		sqes_per_sqb = 1U << txq->sqes_per_sqb_log2;
+		if (cnxk_eth_dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
+			sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc / sqes_per_sqb);
+		txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
+	}
+}
+
+static int
+cn20k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint64_t tx_offloads;
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+
+	/* Can't enable tstamp if all the ports don't have it enabled. */
+	tx_offloads = cnxk_eth_dev->tx_offload_flags;
+	if (dev->tx_adptr_configured) {
+		uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+		uint8_t tstmp_ena = !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+		if (tstmp_ena && !tstmp_req)
+			dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+		else if (!tstmp_ena && tstmp_req)
+			tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+	}
+
+	dev->tx_offloads |= tx_offloads;
+	cn20k_sso_txq_fc_update(eth_dev, tx_queue_id);
+	rc = cn20k_sso_updt_tx_adptr_data(event_dev);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	dev->tx_adptr_configured = 1;
+
+	return 0;
+}
+
+static int
+cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+	return cn20k_sso_updt_tx_adptr_data(event_dev);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -659,6 +778,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
+	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
+	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
+	.eth_tx_adapter_start = cnxk_sso_tx_adapter_start,
+	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
+	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 7a6363a89e..8ea2878fa5 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -25,6 +25,10 @@ struct __rte_cache_aligned cn20k_sso_hws {
 	uintptr_t grp_base;
 	uint16_t xae_waes;
 	int32_t xaq_lmt;
+	/* Tx Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
+	uint64_t lso_tun_fmt;
+	uint8_t tx_adptr_data[];
 };
 
 #endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
new file mode 100644
index 0000000000..63fbdf5328
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_TX_WORKER_H__
+#define __CN20K_TX_WORKER_H__
+
+#include <rte_eventdev.h>
+#include <rte_vect.h>
+
+#include "cn20k_eventdev.h"
+#include "cn20k_tx.h"
+#include "cnxk_eventdev_dp.h"
+#include <rte_event_eth_tx_adapter.h>
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 18/22] event/cnxk: support CN20K Tx adapter fast path
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (15 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 19/22] common/cnxk: add SSO event aggregator pbhagavatula
                               ` (4 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c          |  29 +++
 drivers/event/cnxk/cn20k_tx_worker.h         | 176 +++++++++++++++++++
 drivers/event/cnxk/meson.build               |  20 +++
 drivers/event/cnxk/tx/cn20k/tx_0_15.c        |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c    |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127.c     |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111.c      |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c  |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_all_offload.c |  40 +++++
 20 files changed, 561 insertions(+)
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 509c6ea630..5d49a5e5c6 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -15,6 +15,9 @@
 #define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
 
+#define CN20K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                                               \
+	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -253,6 +256,19 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 #undef R
 	};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq_seg[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_seg_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
 	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
 		if (dev->rx_offloads & NIX_RX_REAS_F) {
 			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
@@ -285,6 +301,12 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 		}
 	}
 
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq_seg);
+	else
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq);
+
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 #else
 	RTE_SET_USED(event_dev);
 #endif
@@ -299,6 +321,13 @@ cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
 	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
 	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
 		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+	event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	if (dev->tx_offloads & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | NIX_TX_OFFLOAD_VLAN_QINQ_F |
+				NIX_TX_OFFLOAD_TSO_F | NIX_TX_OFFLOAD_TSTAMP_F)) {
+		event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+		event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+	}
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 63fbdf5328..c8ab560b0e 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -13,4 +13,180 @@
 #include "cnxk_eventdev_dp.h"
 #include <rte_event_eth_tx_adapter.h>
 
+/* CN20K Tx event fastpath */
+
+static __rte_always_inline struct cn20k_eth_txq *
+cn20k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
+{
+	return (struct cn20k_eth_txq *)(txq_data[(txq_data[m->port] >> 48) +
+						 rte_event_eth_tx_adapter_txq_get(m)] &
+					(BIT_ULL(48) - 1));
+}
+
+static __rte_always_inline void
+cn20k_sso_txq_fc_wait(const struct cn20k_eth_txq *txq)
+{
+	int64_t avail;
+
+#ifdef RTE_ARCH_ARM64
+	int64_t val;
+
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.gt .Ldne%=				\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.le .Lrty%=				\n"
+		     ".Ldne%=:						\n"
+		     : [refill] "=&r"(avail), [val] "=&r" (val)
+		     : [addr] "r" (txq->fc_mem), [adj] "r" (txq->nb_sqb_bufs_adj),
+		       [shft] "r" (txq->sqes_per_sqb_log2)
+		     : "memory");
+#else
+	do {
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
+	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
+#endif
+}
+
+static __rte_always_inline int32_t
+cn20k_sso_sq_depth(const struct cn20k_eth_txq *txq)
+{
+	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
+	return (avail << txq->sqes_per_sqb_log2) - avail;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
+		 uintptr_t lmt_addr, uint8_t sched_type, const uint64_t *txq_data,
+		 const uint32_t flags)
+{
+	uint8_t lnum = 0, loff = 0, shft = 0;
+	struct rte_mbuf *extm = NULL;
+	struct cn20k_eth_txq *txq;
+	uintptr_t laddr;
+	uint16_t segdw;
+	uintptr_t pa;
+	bool sec;
+
+	txq = cn20k_sso_hws_xtract_meta(m, txq_data);
+	if (cn20k_sso_sq_depth(txq) <= 0)
+		return 0;
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1);
+
+	cn20k_nix_tx_skeleton(txq, cmd, flags, 0);
+	/* Perform header writes before barrier
+	 * for TSO
+	 */
+	if (flags & NIX_TX_OFFLOAD_TSO_F)
+		cn20k_nix_xmit_prepare_tso(m, flags);
+
+	cn20k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, &sec, txq->mark_flag,
+			       txq->mark_fmt);
+
+	laddr = lmt_addr;
+	/* Prepare CPT instruction and get nixtx addr if
+	 * it is for CPT on same lmtline.
+	 */
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff, &shft, txq->sa_base,
+				   flags);
+
+	/* Move NIX desc to LMT/NIXTX area */
+	cn20k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
+
+	if (flags & NIX_TX_MULTI_SEG_F)
+		segdw = cn20k_nix_prepare_mseg(txq, m, &extm, (uint64_t *)laddr, flags);
+	else
+		segdw = cn20k_nix_tx_ext_subs(flags) + 2;
+
+	cn20k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		pa = txq->cpt_io_addr | 3 << 4;
+	else
+		pa = txq->io_addr | ((segdw - 1) << 4);
+
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
+
+	cn20k_sso_txq_fc_wait(txq);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_sec_fc_wait_one(txq);
+
+	roc_lmt_submit_steorl(lmt_id, pa);
+
+	/* Memory barrier to make sure lmtst store completes */
+	rte_io_wmb();
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+		cn20k_nix_free_extmbuf(extm);
+
+	return 1;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
+		       const uint64_t *txq_data, const uint32_t flags)
+{
+	struct rte_mbuf *m;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+
+	lmt_addr = ws->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	m = ev->mbuf;
+	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
+}
+
+#define T(name, sz, flags)                                                                         \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_##name(void *port, struct rte_event ev[],    \
+							     uint16_t nb_events);                  \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
+#define SSO_TX(fn, sz, flags)                                                                      \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint64_t cmd[sz];                                                                  \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data, flags);         \
+	}
+
+#define SSO_TX_SEG(fn, sz, flags)                                                                  \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];                               \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data,                 \
+					      (flags) | NIX_TX_MULTI_SEG_F);                       \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[],
+							      uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+								  uint16_t nb_events);
+
 #endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index a2bafab268..8aaf8116f7 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -272,9 +272,29 @@ sources += files(
         'deq/cn20k/deq_all_offload.c',
 )
 
+sources += files(
+        'tx/cn20k/tx_0_15.c',
+        'tx/cn20k/tx_16_31.c',
+        'tx/cn20k/tx_32_47.c',
+        'tx/cn20k/tx_48_63.c',
+        'tx/cn20k/tx_64_79.c',
+        'tx/cn20k/tx_80_95.c',
+        'tx/cn20k/tx_96_111.c',
+        'tx/cn20k/tx_112_127.c',
+        'tx/cn20k/tx_0_15_seg.c',
+        'tx/cn20k/tx_16_31_seg.c',
+        'tx/cn20k/tx_32_47_seg.c',
+        'tx/cn20k/tx_48_63_seg.c',
+        'tx/cn20k/tx_64_79_seg.c',
+        'tx/cn20k/tx_80_95_seg.c',
+        'tx/cn20k/tx_96_111_seg.c',
+        'tx/cn20k/tx_112_127_seg.c',
+        'tx/cn20k/tx_all_offload.c',
+)
 else
 sources += files(
         'deq/cn20k/deq_all_offload.c',
+        'tx/cn20k/tx_all_offload.c',
 )
 endif
 endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15.c b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
new file mode 100644
index 0000000000..b681bc8ab0
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
new file mode 100644
index 0000000000..1dacb63d4b
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127.c b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
new file mode 100644
index 0000000000..abdb8b76a1
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
new file mode 100644
index 0000000000..c39d331b25
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31.c b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
new file mode 100644
index 0000000000..5b88c47914
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
new file mode 100644
index 0000000000..13f00ac478
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47.c b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
new file mode 100644
index 0000000000..1f6008c425
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
new file mode 100644
index 0000000000..587f22df3a
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63.c b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
new file mode 100644
index 0000000000..c712825417
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
new file mode 100644
index 0000000000..1fc11ec904
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79.c b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
new file mode 100644
index 0000000000..0e427f79d8
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
new file mode 100644
index 0000000000..6e1ae41b26
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95.c b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
new file mode 100644
index 0000000000..8c87d2341d
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
new file mode 100644
index 0000000000..43a143f4bd
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111.c b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
new file mode 100644
index 0000000000..1a43af8b02
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
new file mode 100644
index 0000000000..e0e1d8a4ef
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_all_offload.c b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
new file mode 100644
index 0000000000..d2158a4256
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[], uint16_t nb_events)
+{
+	const uint32_t flags = (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_MBUF_NOFF_F |
+				NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+					       uint16_t nb_events)
+{
+	const uint32_t flags =
+		(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+		 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_MBUF_NOFF_F | NIX_TX_OFFLOAD_TSO_F |
+		 NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_SECURITY_F | NIX_TX_MULTI_SEG_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 19/22] common/cnxk: add SSO event aggregator
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (16 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
                               ` (3 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add configuration APIs for CN20K SSO event
aggregator which allows SSO to generate event
vectors.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/sso.h        |  33 ++++
 drivers/common/cnxk/roc_mbox.h      |  21 +++
 drivers/common/cnxk/roc_model.h     |  13 ++
 drivers/common/cnxk/roc_nix_queue.c |   5 -
 drivers/common/cnxk/roc_sso.c       | 230 +++++++++++++++++++++++++++-
 drivers/common/cnxk/roc_sso.h       |  19 ++-
 drivers/common/cnxk/roc_sso_priv.h  |   4 +
 drivers/common/cnxk/version.map     |   4 +
 8 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/drivers/common/cnxk/hw/sso.h b/drivers/common/cnxk/hw/sso.h
index 09b8d4955f..79337a8a3b 100644
--- a/drivers/common/cnxk/hw/sso.h
+++ b/drivers/common/cnxk/hw/sso.h
@@ -146,6 +146,7 @@
 #define SSO_LF_GGRP_OP_ADD_WORK0 (0x0ull)
 #define SSO_LF_GGRP_OP_ADD_WORK1 (0x8ull)
 #define SSO_LF_GGRP_QCTL	 (0x20ull)
+#define SSO_LF_GGRP_TAG_CFG	 (0x40ull)
 #define SSO_LF_GGRP_EXE_DIS	 (0x80ull)
 #define SSO_LF_GGRP_INT		 (0x100ull)
 #define SSO_LF_GGRP_INT_W1S	 (0x108ull)
@@ -159,6 +160,10 @@
 #define SSO_LF_GGRP_MISC_CNT	 (0x200ull)
 #define SSO_LF_GGRP_OP_AW_LMTST	 (0x400ull)
 
+#define SSO_LF_GGRP_AGGR_CFG	    (0x300ull)
+#define SSO_LF_GGRP_AGGR_CTX_BASE   (0x308ull)
+#define SSO_LF_GGRP_AGGR_CTX_INSTOP (0x310ull)
+
 #define SSO_AF_IAQ_FREE_CNT_MASK      0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_MASK     0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_SHIFT    16
@@ -230,5 +235,33 @@
 #define SSO_TT_ATOMIC	(0x1ull)
 #define SSO_TT_UNTAGGED (0x2ull)
 #define SSO_TT_EMPTY	(0x3ull)
+#define SSO_TT_AGG	(0x3ull)
+
+#define SSO_LF_AGGR_INSTOP_FLUSH	(0x0ull)
+#define SSO_LF_AGGR_INSTOP_EVICT	(0x1ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_FLUSH (0x2ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_EVICT (0x3ull)
+
+#define SSO_AGGR_CTX_SZ	    16
+#define SSO_AGGR_NUM_CTX(a) (1 << (a + 6))
+#define SSO_AGGR_MIN_CTX    SSO_AGGR_NUM_CTX(0)
+#define SSO_AGGR_MAX_CTX    SSO_AGGR_NUM_CTX(10)
+#define SSO_AGGR_DEF_TMO    0x3Full
+
+struct sso_agq_ctx {
+	uint64_t ena : 1;
+	uint64_t rsvd_1_3 : 3;
+	uint64_t vwqe_aura : 17;
+	uint64_t rsvd_21_31 : 11;
+	uint64_t tag : 32;
+	uint64_t tt : 2;
+	uint64_t rsvd_66_67 : 2;
+	uint64_t swqe_tag : 12;
+	uint64_t max_vsize_exp : 4;
+	uint64_t vtimewait : 12;
+	uint64_t xqe_type : 4;
+	uint64_t cnt_ena : 1;
+	uint64_t rsvd_101_127 : 27;
+};
 
 #endif /* __SSO_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 63139b5517..db6e8f07b3 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,10 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_AGGR_SET_CONFIG, 0x615, sso_aggr_setconfig, sso_aggr_setconfig,  \
+	  msg_rsp)                                                             \
+	M(SSO_AGGR_GET_STATS, 0x616, sso_aggr_get_stats, sso_info_req,         \
+	  sso_aggr_stats)                                                      \
 	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
@@ -2191,6 +2195,13 @@ struct sso_grp_stash_cfg {
 	uint8_t __io num_linesm1 : 4;
 };
 
+struct sso_aggr_setconfig {
+	struct mbox_msghdr hdr;
+	uint16_t __io npa_pf_func;
+	uint16_t __io hwgrp;
+	uint64_t __io rsvd[2];
+};
+
 struct sso_grp_stats {
 	struct mbox_msghdr hdr;
 	uint16_t __io grp;
@@ -2210,6 +2221,16 @@ struct sso_hws_stats {
 	uint64_t __io arbitration;
 };
 
+struct sso_aggr_stats {
+	struct mbox_msghdr hdr;
+	uint16_t __io grp;
+	uint64_t __io flushed;
+	uint64_t __io completed;
+	uint64_t __io npa_fail;
+	uint64_t __io timeout;
+	uint64_t __io rsvd[4];
+};
+
 /* CPT mailbox error codes
  * Range 901 - 1000.
  */
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 4e686bea2c..0de141b0cc 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include "roc_bits.h"
+#include "roc_constants.h"
 
 extern struct roc_model *roc_model;
 
@@ -157,6 +158,18 @@ roc_model_is_cn20k(void)
 	return roc_model_runtime_is_cn20k();
 }
 
+static inline uint16_t
+roc_model_optimal_align_sz(void)
+{
+	if (roc_model_is_cn9k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn10k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn20k())
+		return ROC_ALIGN << 1;
+	return 128;
+}
+
 static inline uint64_t
 roc_model_is_cn98xx(void)
 {
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 06029275af..e852211ba4 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -794,9 +794,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 		aq->rq.good_utag = rq->tag_mask >> 24;
 		aq->rq.bad_utag = rq->tag_mask >> 24;
 		aq->rq.ltag = rq->tag_mask & BITMASK_ULL(24, 0);
-
-		if (rq->vwqe_ena)
-			aq->rq.wqe_aura = roc_npa_aura_handle_to_aura(rq->vwqe_aura_handle);
 	} else {
 		/* CQ mode */
 		aq->rq.sso_ena = 0;
@@ -881,8 +878,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 			aq->rq_mask.good_utag = ~aq->rq_mask.good_utag;
 			aq->rq_mask.bad_utag = ~aq->rq_mask.bad_utag;
 			aq->rq_mask.ltag = ~aq->rq_mask.ltag;
-			if (rq->vwqe_ena)
-				aq->rq_mask.wqe_aura = ~aq->rq_mask.wqe_aura;
 		} else {
 			/* CQ mode */
 			aq->rq_mask.sso_ena = ~aq->rq_mask.sso_ena;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 45cf6fc39e..4996329018 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -500,9 +500,231 @@ roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 	mbox_put(mbox);
 }
 
+static void
+sso_agq_op_wait(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	uint64_t reg;
+
+	reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	while (reg & BIT_ULL(2)) {
+		plt_delay_us(100);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) +
+				 SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	}
+}
+
+int
+roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_agq_data *data)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	uint32_t cnt, off;
+	struct mbox *mbox;
+	uintptr_t ptr;
+	uint64_t reg;
+	int rc;
+
+	if (sso->agg_mem[hwgrp] == 0) {
+		mbox = mbox_get(sso->dev.mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			mbox_process(mbox);
+			req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+			if (req == NULL) {
+				plt_err("Failed to allocate AGQ config mbox.");
+				mbox_put(mbox);
+				return -EIO;
+			}
+		}
+
+		req->hwgrp = hwgrp;
+		req->npa_pf_func = idev_npa_pffunc_get();
+		rc = mbox_process(mbox);
+		if (rc < 0) {
+			plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+			mbox_put(mbox);
+			return rc;
+		}
+
+		mbox_put(mbox);
+
+		sso->agg_mem[hwgrp] =
+			(uintptr_t)plt_zmalloc(SSO_AGGR_MIN_CTX * sizeof(struct sso_agq_ctx),
+					       roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0)
+			return -ENOMEM;
+		sso->agg_cnt[hwgrp] = SSO_AGGR_MIN_CTX;
+		sso->agg_used[hwgrp] = 0;
+		plt_wmb();
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = (plt_log2_u32(SSO_AGGR_MIN_CTX) - 6) << 16;
+		reg |= (SSO_AGGR_DEF_TMO << 4) | 1;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	}
+
+	if (sso->agg_cnt[hwgrp] >= SSO_AGGR_MAX_CTX)
+		return -ENOSPC;
+
+	if (sso->agg_cnt[hwgrp] == sso->agg_used[hwgrp]) {
+		ptr = sso->agg_mem[hwgrp];
+		cnt = sso->agg_cnt[hwgrp] << 1;
+		sso->agg_mem[hwgrp] = (uintptr_t)plt_zmalloc(cnt * sizeof(struct sso_agq_ctx),
+							     roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0) {
+			sso->agg_mem[hwgrp] = ptr;
+			return -ENOMEM;
+		}
+
+		memcpy((void *)sso->agg_mem[hwgrp], (void *)ptr,
+		       sso->agg_cnt[hwgrp] * sizeof(struct sso_agq_ctx));
+		plt_wmb();
+		sso_agq_op_wait(roc_sso, hwgrp);
+		/* Base address has changed, evict old entries. */
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg &= ~GENMASK_ULL(19, 16);
+		reg |= (uint64_t)(plt_log2_u32(cnt) - 6) << 16;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg = SSO_LF_AGGR_INSTOP_GLOBAL_EVICT << 4;
+		plt_write64(reg,
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+		sso_agq_op_wait(roc_sso, hwgrp);
+		plt_free((void *)ptr);
+
+		sso->agg_cnt[hwgrp] = cnt;
+		off = sso->agg_used[hwgrp];
+	} else {
+		ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+		for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+			if (!ctx[cnt].ena)
+				break;
+		}
+		if (cnt == sso->agg_cnt[hwgrp])
+			return -EINVAL;
+		off = cnt;
+	}
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += off;
+	ctx->ena = 1;
+	ctx->tt = data->tt;
+	ctx->tag = data->tag;
+	ctx->swqe_tag = data->stag;
+	ctx->cnt_ena = data->cnt_ena;
+	ctx->xqe_type = data->xqe_type;
+	ctx->vtimewait = data->vwqe_wait_tmo;
+	ctx->vwqe_aura = data->vwqe_aura;
+	ctx->max_vsize_exp = data->vwqe_max_sz_exp - 2;
+
+	plt_wmb();
+	sso->agg_used[hwgrp]++;
+
+	return 0;
+}
+
+void
+roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint64_t reg;
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += agq_id;
+
+	if (!ctx->ena)
+		return;
+
+	reg = SSO_LF_AGGR_INSTOP_FLUSH << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+
+	memset(ctx, 0, sizeof(struct sso_agq_ctx));
+	plt_wmb();
+	sso->agg_used[hwgrp]--;
+
+	/* Flush the context from CTX Cache */
+	reg = SSO_LF_AGGR_INSTOP_EVICT << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+}
+
+void
+roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	struct mbox *mbox;
+	uint32_t cnt;
+	int rc;
+
+	if (!roc_sso->feat.eva_present)
+		return;
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+		if (!ctx[cnt].ena)
+			continue;
+		roc_sso_hwgrp_agq_free(roc_sso, hwgrp, cnt);
+	}
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+	plt_free((void *)sso->agg_mem[hwgrp]);
+	sso->agg_mem[hwgrp] = 0;
+	sso->agg_cnt[hwgrp] = 0;
+	sso->agg_used[hwgrp] = 0;
+
+	mbox = mbox_get(sso->dev.mbox);
+	req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+	if (req == NULL) {
+		mbox_process(mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			plt_err("Failed to allocate AGQ config mbox.");
+			mbox_put(mbox);
+			return;
+		}
+	}
+
+	req->hwgrp = hwgrp;
+	req->npa_pf_func = 0;
+	rc = mbox_process(mbox);
+	if (rc < 0)
+		plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+	mbox_put(mbox);
+}
+
+uint32_t
+roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag_mask,
+			   uint8_t xqe_type)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint32_t i;
+
+	plt_rmb();
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (i = 0; i < sso->agg_used[hwgrp]; i++) {
+		if (!ctx[i].ena)
+			continue;
+		if (ctx[i].tag == tag_mask && ctx[i].xqe_type == xqe_type)
+			return i;
+	}
+
+	return UINT32_MAX;
+}
+
 int
-roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
-			struct roc_sso_hwgrp_stats *stats)
+roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_hwgrp_stats *stats)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
 	struct sso_grp_stats *req_rsp;
@@ -1058,10 +1280,14 @@ void
 roc_sso_rsrc_fini(struct roc_sso *roc_sso)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	uint32_t cnt;
 
 	if (!roc_sso->nb_hws && !roc_sso->nb_hwgrp)
 		return;
 
+	for (cnt = 0; cnt < roc_sso->nb_hwgrp; cnt++)
+		roc_sso_hwgrp_agq_release(roc_sso, cnt);
+
 	sso_unregister_irqs_priv(roc_sso, sso->pci_dev->intr_handle,
 				 roc_sso->nb_hws, roc_sso->nb_hwgrp);
 	sso_lf_free(&sso->dev, SSO_LF_TYPE_HWS, roc_sso->nb_hws);
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 021db22c86..f73128087a 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -47,6 +47,17 @@ struct roc_sso_xaq_data {
 	void *mem;
 };
 
+struct roc_sso_agq_data {
+	uint8_t tt;
+	uint8_t cnt_ena;
+	uint8_t xqe_type;
+	uint16_t stag;
+	uint32_t tag;
+	uint32_t vwqe_max_sz_exp;
+	uint64_t vwqe_wait_tmo;
+	uint64_t vwqe_aura;
+};
+
 struct roc_sso {
 	struct plt_pci_device *pci_dev;
 	/* Public data. */
@@ -100,6 +111,12 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 					 uint16_t nb_stash);
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
+int __roc_api roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp,
+				      struct roc_sso_agq_data *data);
+void __roc_api roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id);
+void __roc_api roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp);
+uint32_t __roc_api roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag,
+					      uint8_t xqe_type);
 
 /* Utility function */
 uint16_t __roc_api roc_sso_pf_func_get(void);
@@ -107,7 +124,7 @@ uint16_t __roc_api roc_sso_pf_func_get(void);
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
-int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
+int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp,
 				      struct roc_sso_hwgrp_stats *stats);
 int __roc_api roc_sso_hws_stats_get(struct roc_sso *roc_sso, uint8_t hws,
 				    struct roc_sso_hws_stats *stats);
diff --git a/drivers/common/cnxk/roc_sso_priv.h b/drivers/common/cnxk/roc_sso_priv.h
index 21c59c57e6..d6dc6dedd3 100644
--- a/drivers/common/cnxk/roc_sso_priv.h
+++ b/drivers/common/cnxk/roc_sso_priv.h
@@ -13,6 +13,10 @@ struct sso_rsrc {
 struct sso {
 	struct plt_pci_device *pci_dev;
 	struct dev dev;
+	/* EVA memory area */
+	uintptr_t agg_mem[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_used[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_cnt[MAX_RVU_BLKLF_CNT];
 	/* Interrupt handler args. */
 	struct sso_rsrc hws_rsrc[MAX_RVU_BLKLF_CNT];
 	struct sso_rsrc hwgrp_rsrc[MAX_RVU_BLKLF_CNT];
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index de748ac409..14ee6031e2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -500,6 +500,10 @@ INTERNAL {
 	roc_sso_dev_fini;
 	roc_sso_dev_init;
 	roc_sso_dump;
+	roc_sso_hwgrp_agq_alloc;
+	roc_sso_hwgrp_agq_free;
+	roc_sso_hwgrp_agq_from_tag;
+	roc_sso_hwgrp_agq_release;
 	roc_sso_hwgrp_alloc_xaq;
 	roc_sso_hwgrp_base_get;
 	roc_sso_hwgrp_free_xaq_aura;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 20/22] event/cnxk: add Rx/Tx event vector support
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (17 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 19/22] common/cnxk: add SSO event aggregator pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 21/22] common/cnxk: update timer base code pbhagavatula
                               ` (2 subsequent siblings)
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add Event vector support for CN20K Rx/Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c      | 185 ++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_tx_worker.h     |  84 ++++++++++
 drivers/event/cnxk/cn20k_worker.h        |  63 ++++++++
 drivers/event/cnxk/cnxk_eventdev.h       |   3 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  16 +-
 5 files changed, 340 insertions(+), 11 deletions(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 5d49a5e5c6..57e15b6d8c 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -75,6 +75,7 @@ cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = dev->sso.lmt_base;
 	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->lmt_base = dev->sso.lmt_base;
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -595,7 +596,8 @@ cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
 	else
 		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
 			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
-			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -641,6 +643,156 @@ cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
 	eventdev_fops_tstamp_update(event_dev);
 }
 
+static int
+cn20k_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, uint16_t port_id,
+		     const struct rte_event_eth_rx_adapter_queue_conf *queue_conf, int agq)
+{
+	struct roc_nix_rq *rq;
+	uint32_t tag_mask;
+	uint16_t wqe_skip;
+	uint8_t tt;
+	int rc;
+
+	rq = &cnxk_eth_dev->rqs[rq_id];
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+		tag_mask = agq;
+		tt = SSO_TT_AGG;
+		rq->flow_tag_width = 0;
+	} else {
+		tag_mask = (port_id & 0xFF) << 20;
+		tag_mask |= (RTE_EVENT_TYPE_ETHDEV << 28);
+		tt = queue_conf->ev.sched_type;
+		rq->flow_tag_width = 20;
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) {
+			rq->flow_tag_width = 0;
+			tag_mask |= queue_conf->ev.flow_id;
+		}
+	}
+
+	rq->tag_mask = tag_mask;
+	rq->sso_ena = 1;
+	rq->tt = tt;
+	rq->hwgrp = queue_conf->ev.queue_id;
+	wqe_skip = RTE_ALIGN_CEIL(sizeof(struct rte_mbuf), ROC_CACHE_LINE_SZ);
+	wqe_skip = wqe_skip / ROC_CACHE_LINE_SZ;
+	rq->wqe_skip = wqe_skip;
+
+	rc = roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+	return rc;
+}
+
+static int
+cn20k_sso_rx_adapter_vwqe_enable(struct cnxk_sso_evdev *dev, uint16_t port_id, uint16_t rq_id,
+				 const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	uint32_t agq, tag_mask, stag_mask;
+	struct roc_sso_agq_data data;
+	int rc;
+
+	tag_mask = (port_id & 0xff) << 20;
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)
+		tag_mask |= queue_conf->ev.flow_id;
+	else
+		tag_mask |= rq_id;
+
+	stag_mask = tag_mask;
+	tag_mask |= RTE_EVENT_TYPE_ETHDEV_VECTOR << 28;
+	stag_mask |= RTE_EVENT_TYPE_ETHDEV << 28;
+
+	memset(&data, 0, sizeof(struct roc_sso_agq_data));
+	data.tag = tag_mask;
+	data.tt = queue_conf->ev.sched_type;
+	data.stag = stag_mask;
+	data.vwqe_aura = roc_npa_aura_handle_to_aura(queue_conf->vector_mp->pool_id);
+	data.vwqe_max_sz_exp = rte_log2_u32(queue_conf->vector_sz);
+	data.vwqe_wait_tmo = queue_conf->vector_timeout_ns / ((SSO_AGGR_DEF_TMO + 1) * 100);
+	data.xqe_type = 0;
+
+	rc = roc_sso_hwgrp_agq_alloc(&dev->sso, queue_conf->ev.queue_id, &data);
+	if (rc < 0)
+		return rc;
+
+	agq = roc_sso_hwgrp_agq_from_tag(&dev->sso, queue_conf->ev.queue_id, tag_mask, 0);
+	return agq;
+}
+
+static int
+cn20k_rx_adapter_queue_add(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id,
+			   const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t port = eth_dev->data->port_id;
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	int i, rc = 0, agq = 0;
+
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			rc |= cn20k_rx_adapter_queue_add(event_dev, eth_dev, i, queue_conf);
+	} else {
+		rxq_sp = cnxk_eth_rxq_to_sp(eth_dev->data->rx_queues[rx_queue_id]);
+		cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+		rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+			cnxk_sso_updt_xae_cnt(dev, queue_conf->vector_mp,
+					      RTE_EVENT_TYPE_ETHDEV_VECTOR);
+			rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+			if (rc < 0)
+				return rc;
+
+			rc = cn20k_sso_rx_adapter_vwqe_enable(dev, port, rx_queue_id, queue_conf);
+			if (rc < 0)
+				return rc;
+			agq = rc;
+		}
+
+		rc = cn20k_sso_rxq_enable(cnxk_eth_dev, (uint16_t)rx_queue_id, port, queue_conf,
+					  agq);
+
+		/* Propagate force bp devarg */
+		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
+		cnxk_sso_tstamp_cfg(port, eth_dev, dev);
+		cnxk_eth_dev->nb_rxq_sso++;
+	}
+
+	if (rc < 0) {
+		plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+			queue_conf->ev.queue_id);
+		return rc;
+	}
+
+	dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+	return 0;
+}
+
+static int
+cn20k_rx_adapter_queue_del(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_nix_rq *rxq;
+	int i, rc = 0;
+
+	RTE_SET_USED(event_dev);
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			cn20k_rx_adapter_queue_del(event_dev, eth_dev, i);
+	} else {
+		rxq = &cnxk_eth_dev->rqs[rx_queue_id];
+		if (rxq->tt == SSO_TT_AGG)
+			roc_sso_hwgrp_agq_free(&dev->sso, rxq->hwgrp, rxq->tag_mask);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
+		cnxk_eth_dev->nb_rxq_sso--;
+	}
+
+	if (rc < 0)
+		plt_err("Failed to clear Rx adapter config port=%d, q=%d", eth_dev->data->port_id,
+			rx_queue_id);
+	return rc;
+}
+
 static int
 cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
@@ -657,7 +809,7 @@ cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	rc = cn20k_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
 	if (rc)
 		return -EINVAL;
 
@@ -690,7 +842,29 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+	return cn20k_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
+static int
+cn20k_sso_rx_adapter_vector_limits(const struct rte_eventdev *dev,
+				   const struct rte_eth_dev *eth_dev,
+				   struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	RTE_SET_USED(eth_dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		return -ENOTSUP;
+
+	limits->log2_sz = true;
+	limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+	limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+	limits->min_timeout_ns = (SSO_AGGR_DEF_TMO + 1) * 100;
+	limits->max_timeout_ns = (BITMASK_ULL(11, 0) + 1) * limits->min_timeout_ns;
+
+	return 0;
 }
 
 static int
@@ -704,7 +878,8 @@ cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_e
 	if (ret)
 		*caps = 0;
 	else
-		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -807,6 +982,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_rx_adapter_vector_limits_get = cn20k_sso_rx_adapter_vector_limits,
+
 	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
 	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
 	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index c8ab560b0e..b09d845b09 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -139,10 +139,58 @@ cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, ui
 	return 1;
 }
 
+static __rte_always_inline uint16_t
+cn20k_sso_vwqe_split_tx(struct cn20k_sso_hws *ws, struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+			uint64_t *cmd, const uint64_t *txq_data, const uint32_t flags)
+{
+	uint16_t count = 0, port, queue, ret = 0, last_idx = 0;
+	struct cn20k_eth_txq *txq;
+	int32_t space;
+	int i;
+
+	port = mbufs[0]->port;
+	queue = rte_event_eth_tx_adapter_txq_get(mbufs[0]);
+	for (i = 0; i < nb_mbufs; i++) {
+		if (port != mbufs[i]->port || queue != rte_event_eth_tx_adapter_txq_get(mbufs[i])) {
+			if (count) {
+				txq = (struct cn20k_eth_txq
+					       *)(txq_data[(txq_data[port] >> 48) + queue] &
+						  (BIT_ULL(48) - 1));
+				/* Transmit based on queue depth */
+				space = cn20k_sso_sq_depth(txq);
+				if (space < count)
+					goto done;
+				cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx],
+							   count, cmd, flags | NIX_TX_VWQE_F);
+				ret += count;
+				count = 0;
+			}
+			port = mbufs[i]->port;
+			queue = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+			last_idx = i;
+		}
+		count++;
+	}
+	if (count) {
+		txq = (struct cn20k_eth_txq *)(txq_data[(txq_data[port] >> 48) + queue] &
+					       (BIT_ULL(48) - 1));
+		/* Transmit based on queue depth */
+		space = cn20k_sso_sq_depth(txq);
+		if (space < count)
+			goto done;
+		cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx], count, cmd,
+					   flags | NIX_TX_VWQE_F);
+		ret += count;
+	}
+done:
+	return ret;
+}
+
 static __rte_always_inline uint16_t
 cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
 		       const uint64_t *txq_data, const uint32_t flags)
 {
+	struct cn20k_eth_txq *txq;
 	struct rte_mbuf *m;
 	uintptr_t lmt_addr;
 	uint16_t lmt_id;
@@ -150,6 +198,42 @@ cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t
 	lmt_addr = ws->lmt_base;
 	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
 
+	if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+		struct rte_mbuf **mbufs = ev->vec->mbufs;
+		uint64_t meta = *(uint64_t *)ev->vec;
+		uint16_t offset, nb_pkts, left;
+		int32_t space;
+
+		nb_pkts = meta & 0xFFFF;
+		offset = (meta >> 16) & 0xFFF;
+		if (meta & BIT(31)) {
+			txq = (struct cn20k_eth_txq
+				       *)(txq_data[(txq_data[meta >> 32] >> 48) + (meta >> 48)] &
+					  (BIT_ULL(48) - 1));
+
+			/* Transmit based on queue depth */
+			space = cn20k_sso_sq_depth(txq);
+			if (space <= 0)
+				return 0;
+			nb_pkts = nb_pkts < space ? nb_pkts : (uint16_t)space;
+			cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs + offset, nb_pkts,
+						   cmd, flags | NIX_TX_VWQE_F);
+		} else {
+			nb_pkts = cn20k_sso_vwqe_split_tx(ws, mbufs + offset, nb_pkts, cmd,
+							  txq_data, flags);
+		}
+		left = (meta & 0xFFFF) - nb_pkts;
+
+		if (!left) {
+			rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+		} else {
+			*(uint64_t *)ev->vec =
+				(meta & ~0xFFFFFFFUL) | (((uint32_t)nb_pkts + offset) << 16) | left;
+		}
+		rte_prefetch0(ws);
+		return !left;
+	}
+
 	m = ev->mbuf;
 	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
 }
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 9075073fd2..5799e5cc49 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -41,6 +41,58 @@ cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info
 	}
 }
 
+static __rte_always_inline void
+cn20k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, struct cn20k_sso_hws *ws)
+{
+	uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM;
+	struct cnxk_timesync_info *tstamp = ws->tstamp[port_id];
+	void *lookup_mem = ws->lookup_mem;
+	uintptr_t lbase = ws->lmt_base;
+	struct rte_event_vector *vec;
+	uint16_t nb_mbufs, non_vec;
+	struct rte_mbuf **wqe;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+	int i;
+
+	mbuf_init |= ((uint64_t)port_id) << 48;
+	vec = (struct rte_event_vector *)vwqe;
+	wqe = vec->mbufs;
+
+	rte_prefetch0(&vec->ptrs[0]);
+#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
+	for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
+		rte_prefetch0(&vec->ptrs[i]);
+
+	if (flags & NIX_RX_OFFLOAD_TSTAMP_F && tstamp)
+		mbuf_init |= 8;
+
+	nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+	nb_mbufs = cn20k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs, flags | NIX_RX_VWQE_F,
+					      lookup_mem, tstamp, lbase, 0);
+	wqe += nb_mbufs;
+	non_vec = vec->nb_elem - nb_mbufs;
+
+	while (non_vec) {
+		struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+
+		mbuf = (struct rte_mbuf *)((char *)cqe - sizeof(struct rte_mbuf));
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp((uint64_t)wqe[0], (uint64_t)mbuf, tstamp);
+		wqe[0] = (struct rte_mbuf *)mbuf;
+		non_vec--;
+		wqe++;
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
@@ -65,6 +117,17 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
 			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
 		u64[1] = mbuf;
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		__uint128_t vwqe_hdr = *(__uint128_t *)u64[1];
+
+		vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | ((vwqe_hdr & 0xFFFF) << 48) |
+			   ((uint64_t)port << 32);
+		*(uint64_t *)u64[1] = (uint64_t)vwqe_hdr;
+		cn20k_process_vwqe(u64[1], port, flags, ws);
+		/* Mark vector mempool object as get */
+		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
+					  1);
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 4066497e6b..33b3538753 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -266,6 +266,9 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
 			      const struct rte_eth_dev *eth_dev);
 int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
 			     const struct rte_eth_dev *eth_dev);
+void cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev,
+			 struct cnxk_sso_evdev *dev);
+int cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id);
 int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
 				  const struct rte_eth_dev *eth_dev,
 				  int32_t tx_queue_id);
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3cac42111a..4cf48db74c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -167,9 +167,10 @@ cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
 	return rc;
 }
 
-static int
-cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+int
+cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
 	struct roc_nix_rq *rq;
 
 	rq = &cnxk_eth_dev->rqs[rq_id];
@@ -209,10 +210,11 @@ cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
 	return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
 }
 
-static void
-cnxk_sso_tstamp_cfg(uint16_t port_id, struct cnxk_eth_dev *cnxk_eth_dev,
-		    struct cnxk_sso_evdev *dev)
+void
+cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev, struct cnxk_sso_evdev *dev)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+
 	if (cnxk_eth_dev->rx_offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP || cnxk_eth_dev->ptp_en)
 		dev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
 }
@@ -263,7 +265,7 @@ cnxk_sso_rx_adapter_queue_add(
 
 		/* Propagate force bp devarg */
 		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
-		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, cnxk_eth_dev, dev);
+		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, eth_dev, dev);
 		cnxk_eth_dev->nb_rxq_sso++;
 	}
 
@@ -290,7 +292,7 @@ cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
 			cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
 	} else {
-		rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
 		cnxk_eth_dev->nb_rxq_sso--;
 
 		/* Enable drop_re if it was disabled earlier */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 21/22] common/cnxk: update timer base code
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (18 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-25 13:03             ` [PATCH v7 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Update event timer base code to support configuring
HW accelerated timer arm and cancel.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/tim.h        |  5 ++
 drivers/common/cnxk/roc_mbox.h      | 38 ++++++++++++-
 drivers/common/cnxk/roc_tim.c       | 84 ++++++++++++++++++++++++++---
 drivers/common/cnxk/roc_tim.h       | 20 +++++--
 drivers/common/cnxk/version.map     |  1 +
 drivers/event/cnxk/cnxk_tim_evdev.h |  5 --
 6 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index 82b094e3dc..75700a11b8 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -47,10 +47,15 @@
 #define TIM_LF_RAS_INT_ENA_W1S	   (0x310)
 #define TIM_LF_RAS_INT_ENA_W1C	   (0x318)
 #define TIM_LF_RING_REL		   (0x400)
+#define TIM_LF_SCHED_TIMER0	   (0x480)
+#define TIM_LF_RING_FIRST_EXPIRY   (0x558)
 
 #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
+#define TIM_MAX_INTERVAL_EXT_TICKS ((1ULL << 34) - 1)
 #define TIM_MAX_BUCKET_SIZE    ((1ULL << 20) - 2)
 #define TIM_MIN_BUCKET_SIZE    1
 #define TIM_BUCKET_WRAP_SIZE   3
+#define TIM_BUCKET_MIN_GAP     1
+#define TIM_NPA_TMO            0xFFFF
 
 #endif /* __TIM_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index db6e8f07b3..8c0e274684 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -164,6 +164,9 @@ struct mbox_msghdr {
 	  tim_intvl_rsp)                                                       \
 	M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req,          \
 	  tim_capture_rsp)                                                     \
+	M(TIM_CONFIG_HWWQE,    0x807, tim_config_hwwqe, tim_cfg_hwwqe_req,     \
+	  msg_rsp)                                                             \
+	M(TIM_GET_HW_INFO,     0x808, tim_get_hw_info, msg_req, tim_hw_info)   \
 	/* CPT mbox IDs (range 0xA00 - 0xBFF) */                               \
 	M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)    \
 	M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)                   \
@@ -2803,6 +2806,7 @@ enum tim_af_status {
 	TIM_AF_INVALID_ENABLE_DONTFREE = -815,
 	TIM_AF_ENA_DONTFRE_NSET_PERIODIC = -816,
 	TIM_AF_RING_ALREADY_DISABLED = -817,
+	TIM_AF_LF_START_SYNC_FAIL = -818,
 };
 
 enum tim_clk_srcs {
@@ -2895,13 +2899,43 @@ struct tim_config_req {
 	uint8_t __io enabledontfreebuffer;
 	uint32_t __io bucketsize;
 	uint32_t __io chunksize;
-	uint32_t __io interval;
+	uint32_t __io interval_lo;
 	uint8_t __io gpioedge;
-	uint8_t __io rsvd[7];
+	uint8_t __io rsvd[3];
+	uint32_t __io interval_hi;
 	uint64_t __io intervalns;
 	uint64_t __io clockfreq;
 };
 
+struct tim_cfg_hwwqe_req {
+	struct mbox_msghdr hdr;
+	uint16_t __io ring;
+	uint8_t __io grp_ena;
+	uint8_t __io hwwqe_ena;
+	uint8_t __io ins_min_gap;
+	uint8_t __io flw_ctrl_ena;
+	uint8_t __io wqe_rd_clr_ena;
+	uint16_t __io grp_tmo_cntr;
+	uint16_t __io npa_tmo_cntr;
+	uint16_t __io result_offset;
+	uint16_t __io event_count_offset;
+	uint64_t __io rsvd[2];
+};
+
+struct tim_feat_info {
+	uint16_t __io rings;
+	uint8_t __io engines;
+	uint8_t __io hwwqe : 1;
+	uint8_t __io intvl_ext : 1;
+	uint8_t __io rsvd8[4];
+	uint64_t __io rsvd[2];
+};
+
+struct tim_hw_info {
+	struct mbox_msghdr hdr;
+	struct tim_feat_info feat;
+};
+
 struct tim_lf_alloc_rsp {
 	struct mbox_msghdr hdr;
 	uint64_t __io tenns_clk;
diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index 83228fb2b6..e326ea0122 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+#define LF_ENABLE_RETRY_CNT 8
+
 static int
 tim_fill_msix(struct roc_tim *roc_tim, uint16_t nb_ring)
 {
@@ -86,8 +88,11 @@ tim_err_desc(int rc)
 	case TIM_AF_RING_ALREADY_DISABLED:
 		plt_err("Ring already stopped");
 		break;
+	case TIM_AF_LF_START_SYNC_FAIL:
+		plt_err("Ring start sync failed.");
+		break;
 	default:
-		plt_err("Unknown Error.");
+		plt_err("Unknown Error: %d", rc);
 	}
 }
 
@@ -123,10 +128,12 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
 	struct mbox *mbox = mbox_get(dev->mbox);
+	uint8_t retry_cnt = LF_ENABLE_RETRY_CNT;
 	struct tim_enable_rsp *rsp;
 	struct tim_ring_req *req;
 	int rc = -ENOSPC;
 
+retry:
 	req = mbox_alloc_msg_tim_enable_ring(mbox);
 	if (req == NULL)
 		goto fail;
@@ -134,6 +141,9 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 
 	rc = mbox_process_msg(dev->mbox, (void **)&rsp);
 	if (rc) {
+		if (rc == TIM_AF_LF_START_SYNC_FAIL && retry_cnt--)
+			goto retry;
+
 		tim_err_desc(rc);
 		rc = -EIO;
 		goto fail;
@@ -183,10 +193,9 @@ roc_tim_lf_base_get(struct roc_tim *roc_tim, uint8_t ring_id)
 }
 
 int
-roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-		  enum roc_tim_clk_src clk_src, uint8_t ena_periodic,
-		  uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
-		  uint32_t interval, uint64_t intervalns, uint64_t clockfreq)
+roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id, enum roc_tim_clk_src clk_src,
+		  uint8_t ena_periodic, uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
+		  uint64_t interval, uint64_t intervalns, uint64_t clockfreq)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
@@ -204,7 +213,8 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	req->clocksource = clk_src;
 	req->enableperiodic = ena_periodic;
 	req->enabledontfreebuffer = ena_dfb;
-	req->interval = interval;
+	req->interval_lo = interval;
+	req->interval_hi = interval >> 32;
 	req->intervalns = intervalns;
 	req->clockfreq = clockfreq;
 	req->gpioedge = TIM_GPIO_LTOH_TRANS;
@@ -220,6 +230,41 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	return rc;
 }
 
+int
+roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id, struct roc_tim_hwwqe_cfg *cfg)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
+	struct dev *dev = &sso->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_cfg_hwwqe_req *req;
+	int rc = -ENOSPC;
+
+	req = mbox_alloc_msg_tim_config_hwwqe(mbox);
+	if (req == NULL)
+		goto fail;
+	req->ring = ring_id;
+	req->hwwqe_ena = cfg->hwwqe_ena;
+	req->grp_ena = cfg->grp_ena;
+	req->grp_tmo_cntr = cfg->grp_tmo_cyc;
+	req->flw_ctrl_ena = cfg->flw_ctrl_ena;
+	req->result_offset = cfg->result_offset;
+	req->event_count_offset = cfg->event_count_offset;
+
+	req->wqe_rd_clr_ena = 1;
+	req->npa_tmo_cntr = TIM_NPA_TMO;
+	req->ins_min_gap = TIM_BUCKET_MIN_GAP;
+
+	rc = mbox_process(mbox);
+	if (rc) {
+		tim_err_desc(rc);
+		rc = -EIO;
+	}
+
+fail:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_lf_interval(struct roc_tim *roc_tim, enum roc_tim_clk_src clk_src,
 		    uint64_t clockfreq, uint64_t *intervalns,
@@ -353,6 +398,31 @@ tim_free_lf_count_get(struct dev *dev, uint16_t *nb_lfs)
 	return 0;
 }
 
+static int
+tim_hw_info_get(struct roc_tim *roc_tim)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_tim->roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_tim_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc != MBOX_MSG_INVALID)
+		mbox_memcpy(&roc_tim->feat, &rsp->feat, sizeof(roc_tim->feat));
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_init(struct roc_tim *roc_tim)
 {
@@ -372,6 +442,8 @@ roc_tim_init(struct roc_tim *roc_tim)
 	PLT_STATIC_ASSERT(sizeof(struct tim) <= TIM_MEM_SZ);
 	nb_lfs = roc_tim->nb_lfs;
 
+	rc = tim_hw_info_get(roc_tim);
+
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
 		plt_tim_dbg("Failed to get TIM resource count");
diff --git a/drivers/common/cnxk/roc_tim.h b/drivers/common/cnxk/roc_tim.h
index f9a9ad1887..2eb6e6962b 100644
--- a/drivers/common/cnxk/roc_tim.h
+++ b/drivers/common/cnxk/roc_tim.h
@@ -19,10 +19,20 @@ enum roc_tim_clk_src {
 	ROC_TIM_CLK_SRC_INVALID,
 };
 
+struct roc_tim_hwwqe_cfg {
+	uint8_t grp_ena;
+	uint8_t hwwqe_ena;
+	uint8_t flw_ctrl_ena;
+	uint16_t grp_tmo_cyc;
+	uint16_t result_offset;
+	uint16_t event_count_offset;
+};
+
 struct roc_tim {
 	struct roc_sso *roc_sso;
 	/* Public data. */
 	uint16_t nb_lfs;
+	struct tim_feat_info feat;
 	/* Private data. */
 #define TIM_MEM_SZ (1 * 1024)
 	uint8_t reserved[TIM_MEM_SZ] __plt_cache_aligned;
@@ -36,11 +46,11 @@ int __roc_api roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id,
 				uint64_t *start_tsc, uint32_t *cur_bkt);
 int __roc_api roc_tim_lf_disable(struct roc_tim *roc_tim, uint8_t ring_id);
 int __roc_api roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-				enum roc_tim_clk_src clk_src,
-				uint8_t ena_periodic, uint8_t ena_dfb,
-				uint32_t bucket_sz, uint32_t chunk_sz,
-				uint32_t interval, uint64_t intervalns,
-				uint64_t clockfreq);
+				enum roc_tim_clk_src clk_src, uint8_t ena_periodic, uint8_t ena_dfb,
+				uint32_t bucket_sz, uint32_t chunk_sz, uint64_t interval,
+				uint64_t intervalns, uint64_t clockfreq);
+int __roc_api roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id,
+				      struct roc_tim_hwwqe_cfg *cfg);
 int __roc_api roc_tim_lf_interval(struct roc_tim *roc_tim,
 				  enum roc_tim_clk_src clk_src,
 				  uint64_t clockfreq, uint64_t *intervalns,
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 14ee6031e2..e7381ae8b2 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -528,6 +528,7 @@ INTERNAL {
 	roc_tim_lf_alloc;
 	roc_tim_lf_base_get;
 	roc_tim_lf_config;
+	roc_tim_lf_config_hwwqe;
 	roc_tim_lf_disable;
 	roc_tim_lf_enable;
 	roc_tim_lf_free;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index f4c61dfb44..c5b3d67eb8 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -16,11 +16,6 @@
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
 
-#include "hw/tim.h"
-
-#include "roc_model.h"
-#include "roc_tim.h"
-
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
 #define TICK2NSEC(__tck, __freq) (((__tck)*NSECPERSEC) / (__freq))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v7 22/22] event/cnxk: add CN20K timer adapter
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (19 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 21/22] common/cnxk: update timer base code pbhagavatula
@ 2024-10-25 13:03             ` pbhagavatula
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
  21 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-25 13:03 UTC (permalink / raw)
  To: jerinj, stephen, mattias.ronnblom, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event timer adapter support for CN20K platform.
Implement new HWWQE insertion feature supported by CN20K platform.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_tim.c        |   6 +-
 drivers/event/cnxk/cn20k_eventdev.c  |  16 ++-
 drivers/event/cnxk/cn20k_worker.h    |   6 +
 drivers/event/cnxk/cnxk_tim_evdev.c  |  37 ++++-
 drivers/event/cnxk/cnxk_tim_evdev.h  |  14 ++
 drivers/event/cnxk/cnxk_tim_worker.c |  82 +++++++++--
 drivers/event/cnxk/cnxk_tim_worker.h | 201 +++++++++++++++++++++++++++
 7 files changed, 350 insertions(+), 12 deletions(-)

diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index e326ea0122..a1461fedb1 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -409,7 +409,7 @@ tim_hw_info_get(struct roc_tim *roc_tim)
 	mbox_alloc_msg_tim_get_hw_info(mbox);
 	rc = mbox_process_msg(mbox, (void **)&rsp);
 	if (rc && rc != MBOX_MSG_INVALID) {
-		plt_err("Failed to get SSO HW info");
+		plt_err("Failed to get TIM HW info");
 		rc = -EIO;
 		goto exit;
 	}
@@ -443,6 +443,10 @@ roc_tim_init(struct roc_tim *roc_tim)
 	nb_lfs = roc_tim->nb_lfs;
 
 	rc = tim_hw_info_get(roc_tim);
+	if (rc) {
+		plt_tim_dbg("Failed to get TIM HW info");
+		return 0;
+	}
 
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 57e15b6d8c..d68700fc05 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -957,6 +957,13 @@ cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
 	return cn20k_sso_updt_tx_adptr_data(event_dev);
 }
 
+static int
+cn20k_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags, uint32_t *caps,
+		   const struct event_timer_adapter_ops **ops)
+{
+	return cnxk_tim_caps_get(evdev, flags, caps, ops, cn20k_sso_set_priv_mem);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -991,6 +998,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
 	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
 
+	.timer_adapter_caps_get = cn20k_tim_caps_get,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -1068,4 +1077,9 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_STASH "=<string>"
-			      CNXK_SSO_FORCE_BP "=1");
+			      CNXK_SSO_FORCE_BP "=1"
+			      CNXK_TIM_DISABLE_NPA "=1"
+			      CNXK_TIM_CHNK_SLOTS "=<int>"
+			      CNXK_TIM_RINGS_LMT "=<int>"
+			      CNXK_TIM_STATS_ENA "=1"
+			      CNXK_TIM_EXT_CLK "=<string>");
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5799e5cc49..b014e549b9 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -5,6 +5,7 @@
 #ifndef __CN20K_WORKER_H__
 #define __CN20K_WORKER_H__
 
+#include <rte_event_timer_adapter.h>
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
@@ -128,6 +129,11 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		/* Mark vector mempool object as get */
 		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
 					  1);
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_TIMER) {
+		struct rte_event_timer *tev = (struct rte_event_timer *)u64[1];
+
+		tev->state = RTE_EVENT_TIMER_NOT_ARMED;
+		u64[1] = tev->ev.u64;
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 27a4dfb490..994d1d1090 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -78,9 +78,25 @@ cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
 	return rc;
 }
 
+static int
+cnxk_tim_enable_hwwqe(struct cnxk_tim_evdev *dev, struct cnxk_tim_ring *tim_ring)
+{
+	struct roc_tim_hwwqe_cfg hwwqe_cfg;
+
+	memset(&hwwqe_cfg, 0, sizeof(hwwqe_cfg));
+	hwwqe_cfg.hwwqe_ena = 1;
+	hwwqe_cfg.grp_ena = 0;
+	hwwqe_cfg.flw_ctrl_ena = 0;
+	hwwqe_cfg.result_offset = CNXK_TIM_HWWQE_RES_OFFSET_B;
+
+	tim_ring->lmt_base = dev->tim.roc_sso->lmt_base;
+	return roc_tim_lf_config_hwwqe(&dev->tim, tim_ring->ring_id, &hwwqe_cfg);
+}
+
 static void
 cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 {
+	struct cnxk_tim_evdev *dev = cnxk_tim_priv_get();
 	uint8_t prod_flag = !tim_ring->prod_type_sp;
 
 	/* [STATS] [DFB/FB] [SP][MP]*/
@@ -98,6 +114,16 @@ cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 #undef FP
 	};
 
+	if (dev == NULL)
+		return;
+
+	if (dev->tim.feat.hwwqe) {
+		cnxk_tim_ops.arm_burst = cnxk_tim_arm_burst_hwwqe;
+		cnxk_tim_ops.arm_tmo_tick_burst = cnxk_tim_arm_tmo_burst_hwwqe;
+		cnxk_tim_ops.cancel_burst = cnxk_tim_timer_cancel_burst_hwwqe;
+		return;
+	}
+
 	cnxk_tim_ops.arm_burst =
 		arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag];
 	cnxk_tim_ops.arm_tmo_tick_burst =
@@ -224,12 +250,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		}
 	}
 
-	if (tim_ring->disable_npa) {
+	if (!dev->tim.feat.hwwqe && tim_ring->disable_npa) {
 		tim_ring->nb_chunks =
 			tim_ring->nb_timers /
 			CNXK_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
 		tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts;
 	} else {
+		tim_ring->disable_npa = 0;
 		tim_ring->nb_chunks = tim_ring->nb_timers;
 	}
 
@@ -255,6 +282,14 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		goto tim_chnk_free;
 	}
 
+	if (dev->tim.feat.hwwqe) {
+		rc = cnxk_tim_enable_hwwqe(dev, tim_ring);
+		if (rc < 0) {
+			plt_err("Failed to enable hwwqe");
+			goto tim_chnk_free;
+		}
+	}
+
 	plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
 	plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index c5b3d67eb8..114a89ee5a 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -15,6 +15,7 @@
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
+#include <rte_vect.h>
 
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
@@ -29,6 +30,8 @@
 #define CNXK_TIM_MIN_CHUNK_SLOTS    (0x1)
 #define CNXK_TIM_MAX_CHUNK_SLOTS    (0x1FFE)
 #define CNXK_TIM_MAX_POOL_CACHE_SZ  (16)
+#define CNXK_TIM_HWWQE_RES_OFFSET_B (24)
+#define CNXK_TIM_ENT_PER_LMT	    (7)
 
 #define CN9K_TIM_MIN_TMO_TKS (256)
 
@@ -124,6 +127,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	uintptr_t tbase;
 	uint64_t (*tick_fn)(uint64_t tbase);
 	uint64_t ring_start_cyc;
+	uint64_t lmt_base;
 	struct cnxk_tim_bkt *bkt;
 	struct rte_mempool *chunk_pool;
 	struct rte_reciprocal_u64 fast_div;
@@ -310,11 +314,21 @@ TIM_ARM_FASTPATH_MODES
 TIM_ARM_TMO_FASTPATH_MODES
 #undef FP
 
+uint16_t cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers);
+
+uint16_t cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				      struct rte_event_timer **tim, const uint64_t timeout_tick,
+				      const uint16_t nb_timers);
+
 uint16_t
 cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 			    struct rte_event_timer **tim,
 			    const uint16_t nb_timers);
 
+uint16_t cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+					   struct rte_event_timer **tim, const uint16_t nb_timers);
+
 int cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 				 const struct rte_event_timer *evtim, uint64_t *ticks_remaining);
 
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 5e96f6f188..42d376d375 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -32,15 +32,6 @@ cnxk_tim_arm_checks(const struct cnxk_tim_ring *const tim_ring,
 	return -EINVAL;
 }
 
-static inline void
-cnxk_tim_format_event(const struct rte_event_timer *const tim,
-		      struct cnxk_tim_ent *const entry)
-{
-	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 |
-		    (tim->ev.event & 0xFFFFFFFFF);
-	entry->wqe = tim->ev.u64;
-}
-
 static __rte_always_inline uint16_t
 cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 			 struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -77,6 +68,24 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr, struct rte_event_timer **tim,
+			 const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t index;
+
+	for (index = 0; index < nb_timers; index++) {
+		if (cnxk_tim_arm_checks(tim_ring, tim[index]))
+			break;
+
+		if (cnxk_tim_add_entry_hwwqe(tim_ring, tim[index]))
+			break;
+	}
+
+	return index;
+}
+
 #define FP(_name, _f3, _f2, _f1, _flags)                                       \
 	uint16_t __rte_noinline cnxk_tim_arm_burst_##_name(                    \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -132,6 +141,29 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	return set_timers;
 }
 
+uint16_t
+cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+			     struct rte_event_timer **tim, const uint64_t timeout_tick,
+			     const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t idx;
+
+	if (unlikely(!timeout_tick || timeout_tick > tim_ring->nb_bkts)) {
+		const enum rte_event_timer_state state = timeout_tick ?
+								 RTE_EVENT_TIMER_ERROR_TOOLATE :
+								 RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		for (idx = 0; idx < nb_timers; idx++)
+			tim[idx]->state = state;
+
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	return cnxk_tim_add_entry_tmo_hwwqe(tim_ring, tim, timeout_tick * tim_ring->tck_int,
+					    nb_timers);
+}
+
 #define FP(_name, _f2, _f1, _flags)                                            \
 	uint16_t __rte_noinline cnxk_tim_arm_tmo_tick_burst_##_name(           \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -174,6 +206,38 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t i;
+
+	RTE_SET_USED(adptr);
+	for (i = 0; i < nb_timers; i++) {
+		if (tim[i]->state == RTE_EVENT_TIMER_CANCELED) {
+			rte_errno = EALREADY;
+			break;
+		}
+
+		if (tim[i]->state != RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		status = (uint64_t __rte_atomic *)&tim[i]->impl_opaque[1];
+		if (!rte_atomic_compare_exchange_strong_explicit(status, (uint64_t *)&tim[i], 0,
+								 rte_memory_order_release,
+								 rte_memory_order_relaxed)) {
+			rte_errno = ENOENT;
+			break;
+		}
+		tim[i]->state = RTE_EVENT_TIMER_CANCELED;
+	}
+
+	return i;
+}
+
 int
 cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 			     const struct rte_event_timer *evtim, uint64_t *ticks_remaining)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index e52eadbc08..be6744db51 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -132,6 +132,13 @@ cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
 	return (n - (d * rte_reciprocal_divide_u64(n, &R)));
 }
 
+static inline void
+cnxk_tim_format_event(const struct rte_event_timer *const tim, struct cnxk_tim_ent *const entry)
+{
+	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	entry->wqe = tim->ev.u64;
+}
+
 static __rte_always_inline void
 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
 			   const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
@@ -573,6 +580,200 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 	return nb_timers;
 }
 
+static int
+cnxk_tim_add_entry_hwwqe(struct cnxk_tim_ring *const tim_ring, struct rte_event_timer *const tim)
+{
+	uint64_t __rte_atomic *status;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+	int rc = 0;
+
+	status = (uint64_t __rte_atomic *)&tim->impl_opaque[0];
+	status[0] = 0;
+	status[1] = 0;
+
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	lmt = (uint64_t *)lmt_addr;
+
+	lmt[0] = tim->timeout_ticks * tim_ring->tck_int;
+	lmt[1] = 0x1;
+	lmt[2] = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	lmt[3] = (uint64_t)tim;
+
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (1UL << 4);
+	roc_lmt_submit_steorl(wdata, pa);
+
+	do {
+		rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+		rsp &= 0xF0UL;
+	} while (!rsp);
+
+	rsp >>= 4;
+	switch (rsp) {
+	case 0x3:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		rc = !rc;
+		break;
+	case 0x4:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+		rc = !rc;
+		break;
+	case 0x1:
+		tim->state = RTE_EVENT_TIMER_ARMED;
+		break;
+	default:
+		tim->state = RTE_EVENT_TIMER_ERROR;
+		rc = !rc;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+cnxk_tim_add_entry_tmo_hwwqe(struct cnxk_tim_ring *const tim_ring,
+			     struct rte_event_timer **const tim, uint64_t intvl, uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t cnt, i, j, done;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+
+	/* We have 32 LMTLINES per core, but use only 1 line as we need to check status */
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	done = 0;
+	lmt = (uint64_t *)lmt_addr;
+	/* We can do upto 7 timers per LMTLINE */
+	cnt = nb_timers / CNXK_TIM_ENT_PER_LMT;
+
+	lmt[0] = intvl;
+	lmt[1] = 0x1; /* Always relative */
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)(CNXK_TIM_ENT_PER_LMT << 4);
+	for (i = 0; i < cnt; i++) {
+		status = (uint64_t __rte_atomic *)&tim[i * CNXK_TIM_ENT_PER_LMT]->impl_opaque[0];
+
+		for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+			cnxk_tim_format_event(tim[(i * CNXK_TIM_ENT_PER_LMT) + j],
+					      (struct cnxk_tim_ent *)&lmt[(j << 1) + 2]);
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[0] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[1] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += CNXK_TIM_ENT_PER_LMT;
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x4:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic
+							      *)&tim[(i * CNXK_TIM_ENT_PER_LMT) + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+							RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+			goto done;
+		}
+	}
+
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)((nb_timers - cnt) << 4);
+	if (nb_timers - cnt) {
+		status = (uint64_t __rte_atomic *)&tim[cnt]->impl_opaque[0];
+
+		for (i = 0; i < nb_timers - cnt; i++) {
+			cnxk_tim_format_event(tim[cnt + i],
+					      (struct cnxk_tim_ent *)&lmt[(i << 1) + 2]);
+			tim[cnt + i]->impl_opaque[0] = 0;
+			tim[cnt + i]->impl_opaque[1] = 0;
+			tim[cnt + i]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += (nb_timers - cnt);
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x4:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < nb_timers - cnt; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic *)&tim[cnt + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+done:
+	return done;
+}
+
 static int
 cnxk_tim_rm_entry(struct rte_event_timer *tim)
 {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 01/22] event/cnxk: use stdatomic API
  2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
                               ` (20 preceding siblings ...)
  2024-10-25 13:03             ` [PATCH v7 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
@ 2024-10-28 15:59             ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 02/22] common/cnxk: implement SSO HW info pbhagavatula
                                 ` (20 more replies)
  21 siblings, 21 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Replace gcc inbuilt __atomic_xxx intrinsics with rte_atomic_xxx API.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v2 Changes:
- Rebase and remove single dequeue and enqueue functions.
v3 Changes:
- Remove __atomic builtins.
v4 Changes:
- Rebase onto next-event tree.
v5 Changes:
- Rebase, shuffle release notes order.
v6 Changes:
- Remove unnecessary net/cnxk changes.
v7 Changes:
- Add depends on tag to make CI run.
v8 Changes:
- Rebase

 drivers/event/cnxk/cn10k_eventdev.c         |  6 +--
 drivers/event/cnxk/cn10k_eventdev.h         |  4 +-
 drivers/event/cnxk/cn10k_tx_worker.h        |  7 ++-
 drivers/event/cnxk/cn10k_worker.c           | 15 +++---
 drivers/event/cnxk/cn10k_worker.h           |  2 +-
 drivers/event/cnxk/cn9k_eventdev.c          |  8 +--
 drivers/event/cnxk/cn9k_worker.h            | 19 ++++---
 drivers/event/cnxk/cnxk_eventdev.h          |  4 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c | 60 ++++++++++-----------
 drivers/event/cnxk/cnxk_tim_evdev.c         |  4 +-
 drivers/event/cnxk/cnxk_tim_evdev.h         | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.c        | 10 ++--
 drivers/event/cnxk/cnxk_tim_worker.h        | 57 ++++++++++----------
 drivers/event/cnxk/cnxk_worker.h            |  3 +-
 14 files changed, 108 insertions(+), 101 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4edac33a84..4a2c88c8c6 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -94,9 +94,9 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	uint64_t val;

 	ws->grp_base = grp_base;
-	ws->fc_mem = (int64_t *)dev->fc_iova;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 	ws->xaq_lmt = dev->xaq_lmt;
-	ws->fc_cache_space = dev->fc_cache_space;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);

@@ -768,7 +768,7 @@ cn10k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 	for (i = 0; i < dev->nb_event_ports; i++) {
 		struct cn10k_sso_hws *ws = event_dev->data->ports[i];
 		ws->xaq_lmt = dev->xaq_lmt;
-		ws->fc_mem = (int64_t *)dev->fc_iova;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
 		ws->tstamp = dev->tstamp;
 		if (lookup_mem)
 			ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index 372121465c..b8395aa314 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -19,8 +19,8 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	uint64_t meta_aura;
 	/* Add Work Fastpath data */
-	alignas(RTE_CACHE_LINE_SIZE) int64_t *fc_mem;
-	int64_t *fc_cache_space;
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
 	int32_t xaq_lmt;
diff --git a/drivers/event/cnxk/cn10k_tx_worker.h b/drivers/event/cnxk/cn10k_tx_worker.h
index 0695ea23e1..19cb2e22e5 100644
--- a/drivers/event/cnxk/cn10k_tx_worker.h
+++ b/drivers/event/cnxk/cn10k_tx_worker.h
@@ -51,7 +51,9 @@ cn10k_sso_txq_fc_wait(const struct cn10k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -60,7 +62,8 @@ static __rte_always_inline int32_t
 cn10k_sso_sq_depth(const struct cn10k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index c49138316c..06ad7437d5 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -16,7 +16,7 @@ cn10k_sso_hws_new_event(struct cn10k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -80,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
-	return (ws->xaq_lmt - __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED)) *
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
 	       ROC_SSO_XAE_PER_XAQ;
 }

@@ -90,19 +90,20 @@ sso_lmt_aw_wait_fc(struct cn10k_sso_hws *ws, int64_t req)
 	int64_t cached, refill;

 retry:
-	while (__atomic_load_n(ws->fc_cache_space, __ATOMIC_RELAXED) < 0)
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
 		;

-	cached = __atomic_fetch_sub(ws->fc_cache_space, req, __ATOMIC_ACQUIRE) - req;
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
 	/* Check if there is enough space, else update and retry. */
 	if (cached < 0) {
 		/* Check if we have space else retry. */
 		do {
 			refill = sso_read_xaq_space(ws);
 		} while (refill <= 0);
-		__atomic_compare_exchange(ws->fc_cache_space, &cached, &refill,
-					  0, __ATOMIC_RELEASE,
-					  __ATOMIC_RELAXED);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
 		goto retry;
 	}
 }
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 5d3394508e..954dee5a2a 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -311,7 +311,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 		roc_load_pair(gw.u64[0], gw.u64[1],
 			      ws->base + SSOW_LF_GWS_WQE0);
 	} while (gw.u64[0] & BIT_ULL(63));
-	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
 #endif
 	ws->gw_rdata = gw.u64[0];
 	if (gw.u64[1])
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index b176044aa5..05e237c005 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -74,7 +74,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	if (dev->dual_ws) {
 		dws = hws;
 		dws->grp_base = grp_base;
-		dws->fc_mem = (uint64_t *)dev->fc_iova;
+		dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		dws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, dws->base[0] + SSOW_LF_GWS_NW_TIM);
@@ -82,7 +82,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	} else {
 		ws = hws;
 		ws->grp_base = grp_base;
-		ws->fc_mem = (uint64_t *)dev->fc_iova;
+		ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 		ws->xaq_lmt = dev->xaq_lmt;

 		plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
@@ -822,14 +822,14 @@ cn9k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
 			struct cn9k_sso_hws_dual *dws =
 				event_dev->data->ports[i];
 			dws->xaq_lmt = dev->xaq_lmt;
-			dws->fc_mem = (uint64_t *)dev->fc_iova;
+			dws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			dws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				dws->lookup_mem = lookup_mem;
 		} else {
 			struct cn9k_sso_hws *ws = event_dev->data->ports[i];
 			ws->xaq_lmt = dev->xaq_lmt;
-			ws->fc_mem = (uint64_t *)dev->fc_iova;
+			ws->fc_mem = (uint64_t __rte_atomic *)dev->fc_iova;
 			ws->tstamp = dev->tstamp;
 			if (lookup_mem)
 				ws->lookup_mem = lookup_mem;
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 064cdfe94a..f07b8a9bff 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -28,7 +28,7 @@ cn9k_sso_hws_new_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (ws->xaq_lmt <= *ws->fc_mem)
 		return 0;

@@ -71,7 +71,7 @@ cn9k_sso_hws_new_event_wait(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (ws->xaq_lmt <= __atomic_load_n(ws->fc_mem, __ATOMIC_RELAXED))
+	while (ws->xaq_lmt <= rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -93,7 +93,7 @@ cn9k_sso_hws_forward_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(ws->base);
 		cn9k_sso_hws_new_event_wait(ws, ev);
 	}
@@ -110,7 +110,7 @@ cn9k_sso_hws_dual_new_event(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	rte_atomic_thread_fence(__ATOMIC_ACQ_REL);
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	if (dws->xaq_lmt <= *dws->fc_mem)
 		return 0;

@@ -128,7 +128,7 @@ cn9k_sso_hws_dual_new_event_wait(struct cn9k_sso_hws_dual *dws,
 	const uint64_t event_ptr = ev->u64;
 	const uint16_t grp = ev->queue_id;

-	while (dws->xaq_lmt <= __atomic_load_n(dws->fc_mem, __ATOMIC_RELAXED))
+	while (dws->xaq_lmt <= rte_atomic_load_explicit(dws->fc_mem, rte_memory_order_relaxed))
 		;

 	cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
@@ -151,7 +151,7 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, uint64_t base,
 		 * Use add_work operation to transfer the event to
 		 * new group/core
 		 */
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		roc_sso_hws_head_wait(base);
 		cn9k_sso_hws_dual_new_event_wait(dws, ev);
 	}
@@ -571,7 +571,9 @@ cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
 		     : "memory");
 #else
 	do {
-		avail = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
 	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
 #endif
 }
@@ -740,7 +742,8 @@ static __rte_always_inline int32_t
 cn9k_sso_sq_depth(const struct cn9k_eth_txq *txq)
 {
 	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
-			(int32_t)__atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED);
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
 	return (avail << txq->sqes_per_sqb_log2) - avail;
 }

diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index f147ef3c78..982bbb6a9b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -136,7 +136,7 @@ struct __rte_cache_aligned cn9k_sso_hws {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
@@ -154,7 +154,7 @@ struct __rte_cache_aligned cn9k_sso_hws_dual {
 	struct cnxk_timesync_info **tstamp;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t xaq_lmt;
-	uint64_t *fc_mem;
+	uint64_t __rte_atomic *fc_mem;
 	uintptr_t grp_base;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uint64_t lso_tun_fmt;
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 95c0f1b1f7..a4615c1356 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -63,7 +63,7 @@ seqn_list_update(int val)
 		return -1;

 	seqn_list[seqn_list_index++] = val;
-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	return 0;
 }

@@ -82,7 +82,7 @@ seqn_list_check(int limit)
 }

 struct test_core_param {
-	uint32_t *total_events;
+	uint32_t __rte_atomic *total_events;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port;
 	uint8_t sched_type;
@@ -540,13 +540,13 @@ static int
 worker_multi_port_fn(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;
 	int ret;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;
@@ -554,30 +554,30 @@ worker_multi_port_fn(void *arg)
 		ret = validate_event(&ev);
 		RTE_TEST_ASSERT_SUCCESS(ret, "Failed to validate event");
 		rte_pktmbuf_free(ev.mbuf);
-		__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+		rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 	}

 	return 0;
 }

 static inline int
-wait_workers_to_join(const uint32_t *count)
+wait_workers_to_join(const uint32_t __rte_atomic *count)
 {
 	uint64_t cycles, print_cycles;

 	cycles = rte_get_timer_cycles();
 	print_cycles = cycles;
-	while (__atomic_load_n(count, __ATOMIC_RELAXED)) {
+	while (rte_atomic_load_explicit(count, rte_memory_order_relaxed)) {
 		uint64_t new_cycles = rte_get_timer_cycles();

 		if (new_cycles - print_cycles > rte_get_timer_hz()) {
 			plt_info("Events %d",
-				 __atomic_load_n(count, __ATOMIC_RELAXED));
+				 rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			print_cycles = new_cycles;
 		}
 		if (new_cycles - cycles > rte_get_timer_hz() * 10000000000) {
 			plt_err("No schedules for seconds, deadlock (%d)",
-				__atomic_load_n(count, __ATOMIC_RELAXED));
+				rte_atomic_load_explicit(count, rte_memory_order_relaxed));
 			rte_event_dev_dump(evdev, stdout);
 			cycles = new_cycles;
 			return -1;
@@ -593,7 +593,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 			int (*worker_thread)(void *), uint32_t total_events,
 			uint8_t nb_workers, uint8_t sched_type)
 {
-	uint32_t atomic_total_events;
+	uint32_t __rte_atomic atomic_total_events;
 	struct test_core_param *param;
 	uint64_t dequeue_tmo_ticks;
 	uint8_t port = 0;
@@ -603,7 +603,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 	if (!nb_workers)
 		return 0;

-	__atomic_store_n(&atomic_total_events, total_events, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&atomic_total_events, total_events, rte_memory_order_relaxed);
 	seqn_list_init();

 	param = malloc(sizeof(struct test_core_param) * nb_workers);
@@ -640,7 +640,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		param[port].sched_type = sched_type;
 		param[port].port = port;
 		param[port].dequeue_tmo_ticks = dequeue_tmo_ticks;
-		rte_atomic_thread_fence(__ATOMIC_RELEASE);
+		rte_atomic_thread_fence(rte_memory_order_release);
 		w_lcore = rte_get_next_lcore(w_lcore, 1, 0);
 		if (w_lcore == RTE_MAX_LCORE) {
 			plt_err("Failed to get next available lcore");
@@ -651,7 +651,7 @@ launch_workers_and_wait(int (*main_thread)(void *),
 		rte_eal_remote_launch(worker_thread, &param[port], w_lcore);
 	}

-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 	ret = wait_workers_to_join(&atomic_total_events);
 	free(param);

@@ -890,13 +890,13 @@ worker_flow_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -916,8 +916,8 @@ worker_flow_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1046,13 +1046,13 @@ worker_group_based_pipeline(void *arg)
 {
 	struct test_core_param *param = arg;
 	uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t new_sched_type = param->sched_type;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1,
 						      dequeue_tmo_ticks);
 		if (!valid_event)
@@ -1072,8 +1072,8 @@ worker_group_based_pipeline(void *arg)

 			if (seqn_list_update(seqn) == 0) {
 				rte_pktmbuf_free(ev.mbuf);
-				__atomic_fetch_sub(total_events, 1,
-						   __ATOMIC_RELAXED);
+				rte_atomic_fetch_sub_explicit(total_events, 1,
+							      rte_memory_order_relaxed);
 			} else {
 				plt_err("Failed to update seqn_list");
 				return -1;
@@ -1205,19 +1205,19 @@ static int
 worker_flow_based_pipeline_max_stages_rand_sched_type(void *arg)
 {
 	struct test_core_param *param = arg;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;
 	uint8_t port = param->port;
 	uint16_t valid_event;
 	struct rte_event ev;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.sub_event_type == MAX_STAGES) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.sub_event_type++;
@@ -1284,16 +1284,16 @@ worker_queue_based_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
@@ -1329,16 +1329,16 @@ worker_mixed_pipeline_max_stages_rand_sched_type(void *arg)
 				       &queue_count),
 		"Queue count get failed");
 	uint8_t nr_queues = queue_count;
-	uint32_t *total_events = param->total_events;
+	uint32_t __rte_atomic *total_events = param->total_events;

-	while (__atomic_load_n(total_events, __ATOMIC_RELAXED) > 0) {
+	while (rte_atomic_load_explicit(total_events, rte_memory_order_relaxed) > 0) {
 		valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0);
 		if (!valid_event)
 			continue;

 		if (ev.queue_id == nr_queues - 1) { /* Last stage */
 			rte_pktmbuf_free(ev.mbuf);
-			__atomic_fetch_sub(total_events, 1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(total_events, 1, rte_memory_order_relaxed);
 		} else {
 			ev.event_type = RTE_EVENT_TYPE_CPU;
 			ev.queue_id++;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index bba70646fa..74a6da5070 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -358,7 +358,7 @@ cnxk_tim_stats_get(const struct rte_event_timer_adapter *adapter,
 		tim_ring->tick_fn(tim_ring->tbase) - tim_ring->ring_start_cyc;

 	stats->evtim_exp_count =
-		__atomic_load_n(&tim_ring->arm_cnt, __ATOMIC_RELAXED);
+		rte_atomic_load_explicit(&tim_ring->arm_cnt, rte_memory_order_relaxed);
 	stats->ev_enq_count = stats->evtim_exp_count;
 	stats->adapter_tick_count =
 		rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div);
@@ -370,7 +370,7 @@ cnxk_tim_stats_reset(const struct rte_event_timer_adapter *adapter)
 {
 	struct cnxk_tim_ring *tim_ring = adapter->data->adapter_priv;

-	__atomic_store_n(&tim_ring->arm_cnt, 0, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&tim_ring->arm_cnt, 0, rte_memory_order_relaxed);
 	return 0;
 }

diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index 6cf10dbf4d..f4c61dfb44 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -108,15 +108,15 @@ struct cnxk_tim_evdev {
 struct cnxk_tim_bkt {
 	uint64_t first_chunk;
 	union {
-		uint64_t w1;
+		uint64_t __rte_atomic w1;
 		struct {
-			uint32_t nb_entry;
+			uint32_t __rte_atomic nb_entry;
 			uint8_t sbt : 1;
 			uint8_t hbt : 1;
 			uint8_t bsk : 1;
 			uint8_t rsvd : 5;
-			uint8_t lock;
-			int16_t chunk_remainder;
+			uint8_t __rte_atomic lock;
+			int16_t __rte_atomic chunk_remainder;
 		};
 	};
 	uint64_t current_chunk;
@@ -134,7 +134,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	struct rte_reciprocal_u64 fast_div;
 	struct rte_reciprocal_u64 fast_bkt;
 	uint64_t tck_int;
-	uint64_t arm_cnt;
+	uint64_t __rte_atomic arm_cnt;
 	uintptr_t base;
 	uint8_t prod_type_sp;
 	uint8_t enable_stats;
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 1f2f2fe5d8..db31f91818 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -70,7 +70,7 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, index, __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, index, rte_memory_order_relaxed);

 	return index;
 }
@@ -124,8 +124,8 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	}

 	if (flags & CNXK_TIM_ENA_STATS)
-		__atomic_fetch_add(&tim_ring->arm_cnt, set_timers,
-				   __ATOMIC_RELAXED);
+		rte_atomic_fetch_add_explicit(&tim_ring->arm_cnt, set_timers,
+					      rte_memory_order_relaxed);

 	return set_timers;
 }
@@ -151,7 +151,7 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	int ret;

 	RTE_SET_USED(adptr);
-	rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+	rte_atomic_thread_fence(rte_memory_order_acquire);
 	for (index = 0; index < nb_timers; index++) {
 		if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) {
 			rte_errno = EALREADY;
@@ -193,7 +193,7 @@ cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 		return -ENOENT;

 	bkt = (struct cnxk_tim_bkt *)evtim->impl_opaque[1];
-	sema = __atomic_load_n(&bkt->w1, rte_memory_order_acquire);
+	sema = rte_atomic_load_explicit(&bkt->w1, rte_memory_order_acquire);
 	if (cnxk_tim_bkt_get_hbt(sema) || !cnxk_tim_bkt_get_nent(sema))
 		return -ENOENT;

diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index f530d8c5c4..e52eadbc08 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -23,19 +23,19 @@ cnxk_tim_bkt_fetch_rem(uint64_t w1)
 static inline int16_t
 cnxk_tim_bkt_get_rem(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
+	return rte_atomic_load_explicit(&bktp->chunk_remainder, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_set_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_sub_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
 {
-	__atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->chunk_remainder, v, rte_memory_order_relaxed);
 }

 static inline uint8_t
@@ -56,20 +56,20 @@ cnxk_tim_bkt_clr_bsk(struct cnxk_tim_bkt *bktp)
 	/* Clear everything except lock. */
 	const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema_lock(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
-				  __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
+					     rte_memory_order_acquire);
 }

 static inline uint64_t
 cnxk_tim_bkt_fetch_sema(struct cnxk_tim_bkt *bktp)
 {
-	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, TIM_BUCKET_SEMA, rte_memory_order_relaxed);
 }

 static inline uint64_t
@@ -77,19 +77,19 @@ cnxk_tim_bkt_inc_lock(struct cnxk_tim_bkt *bktp)
 {
 	const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;

-	return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
+	return rte_atomic_fetch_add_explicit(&bktp->w1, v, rte_memory_order_acquire);
 }

 static inline void
 cnxk_tim_bkt_dec_lock(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_release);
 }

 static inline void
 cnxk_tim_bkt_dec_lock_relaxed(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_sub_explicit(&bktp->lock, 1, rte_memory_order_relaxed);
 }

 static inline uint32_t
@@ -102,19 +102,19 @@ cnxk_tim_bkt_get_nent(uint64_t w1)
 static inline void
 cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
 {
-	__atomic_fetch_add(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, 1, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent_relaxed(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELAXED);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_relaxed);
 }

 static inline void
 cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
 {
-	__atomic_fetch_add(&bktp->nb_entry, v, __ATOMIC_RELEASE);
+	rte_atomic_fetch_add_explicit(&bktp->nb_entry, v, rte_memory_order_release);
 }

 static inline uint64_t
@@ -123,7 +123,7 @@ cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
 	const uint64_t v =
 		~(TIM_BUCKET_W1_M_NUM_ENTRIES << TIM_BUCKET_W1_S_NUM_ENTRIES);

-	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL) & v;
+	return rte_atomic_fetch_and_explicit(&bktp->w1, v, rte_memory_order_acq_rel) & v;
 }

 static inline uint64_t
@@ -273,8 +273,8 @@ cnxk_tim_add_entry_sp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -356,8 +356,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -385,8 +385,8 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 			     : [crem] "r"(&bkt->w1)
 			     : "memory");
 #else
-		while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
-		       0)
+		while (rte_atomic_load_explicit((int64_t __rte_atomic *)&bkt->w1,
+						rte_memory_order_relaxed) < 0)
 			;
 #endif
 		goto __retry;
@@ -408,15 +408,14 @@ cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
 		*chunk = *pent;
 		if (cnxk_tim_bkt_fetch_lock(lock_sema)) {
 			do {
-				lock_sema = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				lock_sema = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (cnxk_tim_bkt_fetch_lock(lock_sema) - 1);
 		}
-		rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+		rte_atomic_thread_fence(rte_memory_order_acquire);
 		mirr_bkt->current_chunk = (uintptr_t)chunk;
-		__atomic_store_n(&bkt->chunk_remainder,
-				 tim_ring->nb_chunk_slots - 1,
-				 __ATOMIC_RELEASE);
+		rte_atomic_store_explicit(&bkt->chunk_remainder, tim_ring->nb_chunk_slots - 1,
+					  rte_memory_order_release);
 	} else {
 		chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
 		chunk += tim_ring->nb_chunk_slots - rem;
@@ -489,8 +488,8 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 				     : "memory");
 #else
 			do {
-				hbt_state = __atomic_load_n(&bkt->w1,
-							    __ATOMIC_RELAXED);
+				hbt_state = rte_atomic_load_explicit(&bkt->w1,
+								     rte_memory_order_relaxed);
 			} while (hbt_state & BIT_ULL(33));
 #endif

@@ -521,7 +520,7 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 			     : [lock] "r"(&bkt->lock)
 			     : "memory");
 #else
-		while (__atomic_load_n(&bkt->lock, __ATOMIC_RELAXED))
+		while (rte_atomic_load_explicit(&bkt->lock, rte_memory_order_relaxed))
 			;
 #endif
 		goto __retry;
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 0e0d728ba4..3592344e04 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -33,7 +33,8 @@ cnxk_sso_hws_swtag_desched(uint32_t tag, uint8_t new_tt, uint16_t grp,
 	uint64_t val;

 	val = tag | ((uint64_t)(new_tt & 0x3) << 32) | ((uint64_t)grp << 34);
-	__atomic_store_n((uint64_t *)swtag_desched_op, val, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit((uint64_t __rte_atomic *)swtag_desched_op, val,
+				  rte_memory_order_release);
 }

 static __rte_always_inline void
--
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 02/22] common/cnxk: implement SSO HW info
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
                                 ` (19 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Ankur Dwivedi, Anoob Joseph,
	Tejasree Kondoj, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HW info mbox to get hardware capabilities, and reuse
them instead of depending on hardcoded values.
Remove redundant includes.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_mbox.h              | 28 ++++++++++
 drivers/common/cnxk/roc_sso.c               | 58 ++++++++++++++++++---
 drivers/common/cnxk/roc_sso.h               |  9 ++--
 drivers/common/cnxk/version.map             |  1 +
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c   |  5 +-
 drivers/crypto/cnxk/cn9k_cryptodev_ops.c    |  9 +---
 drivers/event/cnxk/cn10k_eventdev.c         |  1 +
 drivers/event/cnxk/cn10k_eventdev.h         |  1 +
 drivers/event/cnxk/cn10k_worker.c           |  6 ++-
 drivers/event/cnxk/cnxk_eventdev.c          |  4 +-
 drivers/event/cnxk/cnxk_eventdev.h          |  3 --
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  2 +
 drivers/event/cnxk/cnxk_eventdev_stats.c    |  2 +
 drivers/event/cnxk/cnxk_tim_evdev.c         |  2 +-
 drivers/event/cnxk/cnxk_tim_worker.c        |  2 +
 drivers/event/cnxk/cnxk_worker.c            |  4 +-
 16 files changed, 103 insertions(+), 34 deletions(-)

diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 0864c0e8c0..645da563c2 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,7 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
 	  tim_lf_alloc_rsp)                                                    \
@@ -2117,6 +2118,33 @@ struct ssow_chng_mship {
 	uint16_t __io hwgrps[MAX_RVU_BLKLF_CNT]; /* Array of hwgrps. */
 };
 
+struct sso_feat_info {
+	uint8_t __io hw_flr : 1;
+	uint8_t __io hw_prefetch : 1;
+	uint8_t __io sw_prefetch : 1;
+	uint8_t __io lsw : 1;
+	uint8_t __io fwd_grp : 1;
+	uint8_t __io eva_present : 1;
+	uint8_t __io no_nsched : 1;
+	uint8_t __io tag_cfg : 1;
+	uint8_t __io gwc_per_core;
+	uint16_t __io hws;
+	uint16_t __io hwgrps;
+	uint16_t __io hwgrps_per_pf;
+	uint16_t __io iue;
+	uint16_t __io taq_lines;
+	uint16_t __io taq_ent_per_line;
+	uint16_t __io xaq_buf_size;
+	uint16_t __io xaq_wq_entries;
+	uint32_t __io eva_ctx_per_hwgrp;
+	uint64_t __io rsvd[2];
+};
+
+struct sso_hw_info {
+	struct mbox_msghdr hdr;
+	struct sso_feat_info feat;
+};
+
 struct sso_hw_setconfig {
 	struct mbox_msghdr hdr;
 	uint32_t __io npa_aura_id;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 2e3b134bfc..8a219b985b 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -191,7 +191,7 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 		goto exit;
 	}
 
-	roc_sso->max_hwgrp = rsrc_cnt->sso;
+	roc_sso->max_hwgrp = PLT_MIN(rsrc_cnt->sso, roc_sso->feat.hwgrps_per_pf);
 	roc_sso->max_hws = rsrc_cnt->ssow;
 
 	rc = 0;
@@ -200,6 +200,37 @@ sso_rsrc_get(struct roc_sso *roc_sso)
 	return rc;
 }
 
+static int
+sso_hw_info_get(struct roc_sso *roc_sso)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct sso_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_sso_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc == MBOX_MSG_INVALID) {
+		roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	} else {
+		mbox_memcpy(&roc_sso->feat, &rsp->feat, sizeof(roc_sso->feat));
+
+		if (!roc_sso->feat.hwgrps_per_pf)
+			roc_sso->feat.hwgrps_per_pf = ROC_SSO_MAX_HWGRP_PER_PF;
+	}
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 void
 sso_hws_link_modify(uint8_t hws, uintptr_t base, struct plt_bitmap *bmp, uint16_t hwgrp[],
 		    uint16_t n, uint8_t set, uint16_t enable)
@@ -319,6 +350,12 @@ roc_sso_hwgrp_base_get(struct roc_sso *roc_sso, uint16_t hwgrp)
 	return dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | hwgrp << 12);
 }
 
+uint16_t
+roc_sso_pf_func_get(void)
+{
+	return idev_sso_pffunc_get();
+}
+
 uint64_t
 roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 {
@@ -670,9 +707,8 @@ roc_sso_hwgrp_init_xaq_aura(struct roc_sso *roc_sso, uint32_t nb_xae)
 	struct dev *dev = &sso->dev;
 	int rc;
 
-	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae,
-				     roc_sso->xae_waes, roc_sso->xaq_buf_size,
-				     roc_sso->nb_hwgrp);
+	rc = sso_hwgrp_init_xaq_aura(dev, &roc_sso->xaq, nb_xae, roc_sso->feat.xaq_wq_entries,
+				     roc_sso->feat.xaq_buf_size, roc_sso->nb_hwgrp);
 	return rc;
 }
 
@@ -953,9 +989,11 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 		goto hwgrp_alloc_fail;
 	}
 
-	roc_sso->xaq_buf_size = rsp_hwgrp->xaq_buf_size;
-	roc_sso->xae_waes = rsp_hwgrp->xaq_wq_entries;
-	roc_sso->iue = rsp_hwgrp->in_unit_entries;
+	if (!roc_sso->feat.xaq_buf_size || !roc_sso->feat.xaq_wq_entries || !roc_sso->feat.iue) {
+		roc_sso->feat.xaq_buf_size = rsp_hwgrp->xaq_buf_size;
+		roc_sso->feat.xaq_wq_entries = rsp_hwgrp->xaq_wq_entries;
+		roc_sso->feat.iue = rsp_hwgrp->in_unit_entries;
+	}
 
 	rc = sso_msix_fill(roc_sso, nb_hws, nb_hwgrp);
 	if (rc < 0) {
@@ -1059,6 +1097,12 @@ roc_sso_dev_init(struct roc_sso *roc_sso)
 		goto fail;
 	}
 
+	rc = sso_hw_info_get(roc_sso);
+	if (rc < 0) {
+		plt_err("Failed to get SSO HW info");
+		goto fail;
+	}
+
 	rc = sso_rsrc_get(roc_sso);
 	if (rc < 0) {
 		plt_err("Failed to get SSO resources");
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 4ac901762e..021db22c86 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -8,7 +8,7 @@
 #include "hw/ssow.h"
 
 #define ROC_SSO_AW_PER_LMT_LINE_LOG2 3
-#define ROC_SSO_XAE_PER_XAQ	     352
+#define ROC_SSO_MAX_HWGRP_PER_PF     256
 
 struct roc_sso_hwgrp_qos {
 	uint16_t hwgrp;
@@ -57,9 +57,7 @@ struct roc_sso {
 	uintptr_t lmt_base;
 	struct roc_sso_xaq_data xaq;
 	/* HW Const. */
-	uint32_t xae_waes;
-	uint32_t xaq_buf_size;
-	uint32_t iue;
+	struct sso_feat_info feat;
 	/* Private data. */
 #define ROC_SSO_MEM_SZ (16 * 1024)
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
@@ -103,6 +101,9 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
 
+/* Utility function */
+uint16_t __roc_api roc_sso_pf_func_get(void);
+
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 935be3584c..efb5e44da8 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -517,6 +517,7 @@ INTERNAL {
 	roc_sso_hws_gwc_invalidate;
 	roc_sso_hws_unlink;
 	roc_sso_ns_to_gw;
+	roc_sso_pf_func_get;
 	roc_sso_rsrc_fini;
 	roc_sso_rsrc_init;
 	roc_tim_fini;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 88ea032bcb..dbebc5aef1 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -11,10 +11,7 @@
 
 #include <ethdev_driver.h>
 
-#include "roc_cpt.h"
-#include "roc_idev.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn10k_cryptodev.h"
 #include "cn10k_cryptodev_event_dp.h"
diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
index ae00af5019..8d10bc9f9b 100644
--- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
@@ -8,14 +8,7 @@
 #include <rte_ip.h>
 #include <rte_vect.h>
 
-#include "roc_cpt.h"
-#if defined(__aarch64__)
-#include "roc_io.h"
-#else
-#include "roc_io_generic.h"
-#endif
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 #include "cn9k_cryptodev.h"
 #include "cn9k_cryptodev_ops.h"
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 4a2c88c8c6..c7af0fac11 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -64,6 +64,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
 
 	return ws;
 }
diff --git a/drivers/event/cnxk/cn10k_eventdev.h b/drivers/event/cnxk/cn10k_eventdev.h
index b8395aa314..4f0eab8acb 100644
--- a/drivers/event/cnxk/cn10k_eventdev.h
+++ b/drivers/event/cnxk/cn10k_eventdev.h
@@ -23,6 +23,7 @@ struct __rte_cache_aligned cn10k_sso_hws {
 	int64_t __rte_atomic *fc_cache_space;
 	uintptr_t aw_lmt;
 	uintptr_t grp_base;
+	uint16_t xae_waes;
 	int32_t xaq_lmt;
 	/* Tx Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index 06ad7437d5..80077ec8a1 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cn10k_worker.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -81,7 +83,7 @@ static inline int32_t
 sso_read_xaq_space(struct cn10k_sso_hws *ws)
 {
 	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
-	       ROC_SSO_XAE_PER_XAQ;
+		ws->xae_waes;
 }
 
 static inline void
@@ -394,7 +396,7 @@ cn10k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
 	int32_t space;
 
 	/* Do a common back-pressure check and return */
-	space = sso_read_xaq_space(ws) - ROC_SSO_XAE_PER_XAQ;
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
 	if (space <= 0)
 		return 0;
 	nb_events = space < nb_events ? space : nb_events;
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 84a55511a3..ab7420ab79 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -2,7 +2,7 @@
  * Copyright(C) 2021 Marvell.
  */
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
@@ -47,7 +47,7 @@ cnxk_sso_xaq_allocate(struct cnxk_sso_evdev *dev)
 	if (dev->num_events > 0)
 		xae_cnt = dev->num_events;
 	else
-		xae_cnt = dev->sso.iue;
+		xae_cnt = dev->sso.feat.iue;
 
 	if (dev->xae_cnt)
 		xae_cnt += dev->xae_cnt;
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 982bbb6a9b..904a9b022d 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -21,9 +21,6 @@
 
 #include "cnxk_eventdev_dp.h"
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-
 #include "cnxk_tim_evdev.h"
 
 #define CNXK_SSO_XAE_CNT   "xae_cnt"
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index a4615c1356..311de3d92b 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -18,6 +18,8 @@
 #include <rte_random.h>
 #include <rte_test.h>
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_eventdev_stats.c b/drivers/event/cnxk/cnxk_eventdev_stats.c
index a8a87a06e4..6dea91aedf 100644
--- a/drivers/event/cnxk/cnxk_eventdev_stats.c
+++ b/drivers/event/cnxk/cnxk_eventdev_stats.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_eventdev.h"
 #include "cnxk_eventdev_dp.h"
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 74a6da5070..27a4dfb490 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -4,7 +4,7 @@
 
 #include <math.h>
 
-#include "roc_npa.h"
+#include "roc_api.h"
 
 #include "cnxk_eventdev.h"
 #include "cnxk_tim_evdev.h"
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index db31f91818..5e96f6f188 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -2,6 +2,8 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include "roc_api.h"
+
 #include "cnxk_tim_evdev.h"
 #include "cnxk_tim_worker.h"
 
diff --git a/drivers/event/cnxk/cnxk_worker.c b/drivers/event/cnxk/cnxk_worker.c
index 60876abcff..a07c9185d9 100644
--- a/drivers/event/cnxk/cnxk_worker.c
+++ b/drivers/event/cnxk/cnxk_worker.c
@@ -6,9 +6,7 @@
 #include <rte_pmd_cnxk_eventdev.h>
 #include <rte_eventdev.h>
 
-#include "roc_platform.h"
-#include "roc_sso.h"
-#include "roc_sso_dp.h"
+#include "roc_api.h"
 
 struct pwords {
 	uint64_t u[5];
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 03/22] event/cnxk: add CN20K specific device probe
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 02/22] common/cnxk: implement SSO HW info pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 04/22] event/cnxk: add CN20K device config pbhagavatula
                                 ` (18 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton, Nithin Dabilpuram,
	Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra,
	Anatoly Burakov
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add platform specific event device probe and remove, also add
event device info get function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 doc/guides/eventdevs/cnxk.rst          | 23 ++++---
 doc/guides/rel_notes/release_24_11.rst |  4 ++
 drivers/common/cnxk/roc_sso.c          | 10 ++-
 drivers/event/cnxk/cn20k_eventdev.c    | 93 ++++++++++++++++++++++++++
 drivers/event/cnxk/meson.build         |  8 ++-
 5 files changed, 124 insertions(+), 14 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.c

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index e21846f4e0..55028f889b 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -16,6 +16,7 @@ Supported OCTEON cnxk SoCs
 
 - CN9XX
 - CN10XX
+- CN20XX
 
 Features
 --------
@@ -36,7 +37,7 @@ Features of the OCTEON cnxk SSO PMD are:
   DRAM
 - HW accelerated dequeue timeout support to enable power management
 - HW managed event timers support through TIM, with high precision and
-  time granularity of 2.5us on CN9K and 1us on CN10K.
+  time granularity of 2.5us on CN9K and 1us on CN10K/CN20K.
 - Up to 256 TIM rings a.k.a event timer adapters.
 - Up to 8 rings traversed in parallel.
 - HW managed packets enqueued from ethdev to eventdev exposed through event eth
@@ -45,8 +46,8 @@ Features of the OCTEON cnxk SSO PMD are:
 - Lockfree Tx from event eth Tx adapter using ``RTE_ETH_TX_OFFLOAD_MT_LOCKFREE``
   capability while maintaining receive packet order.
 - Full Rx/Tx offload support defined through ethdev queue configuration.
-- HW managed event vectorization on CN10K for packets enqueued from ethdev to
-  eventdev configurable per each Rx queue in Rx adapter.
+- HW managed event vectorization on CN10K/CN20K for packets enqueued from ethdev
+  to eventdev configurable per each Rx queue in Rx adapter.
 - Event vector transmission via Tx adapter.
 - Up to 2 event link profiles.
 
@@ -93,13 +94,13 @@ Runtime Config Options
 
     -a 0002:0e:00.0,qos=[1-50-50]
 
-- ``CN10K WQE stashing support``
+- ``CN10K/CN20K WQE stashing support``
 
-  CN10K supports stashing the scheduled WQE carried by `rte_event` to the
-  cores L2 Dcache. The number of cache lines to be stashed and the offset
-  is configurable per HWGRP i.e. event queue. The dictionary format is as
-  follows `[Qx|stash_offset|stash_length]` here the stash offset can be
-  a negative integer.
+  CN10K/CN20K supports stashing the scheduled WQE carried by `rte_event`
+  to the cores L2 Dcache. The number of cache lines to be stashed and the
+  offset is configurable per HWGRP i.e. event queue. The dictionary format
+  is as follows `[Qx|stash_offset|stash_length]` here the stash offset can
+  be a negative integer.
   By default, stashing is enabled on queues which have been connected to
   Rx adapter. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed.
 
@@ -188,8 +189,8 @@ Runtime Config Options
 
     -a 0002:0e:00.0,tim_eclk_freq=122880000-1000000000-0
 
-Power Saving on CN10K
----------------------
+Power Saving on CN10K/CN20K
+---------------------------
 
 ARM cores can additionally use WFE when polling for transactions on SSO bus
 to save power i.e., in the event dequeue call ARM core can enter WFE and exit
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 53a5ffebe5..70a13ef958 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -235,6 +235,10 @@ New Features
 
   * Added independent enqueue feature.
 
+* **Updated Marvell cnxk event device driver.**
+
+  * Added eventdev driver support for CN20K SoC.
+
 * **Added IPv4 network order lookup in the FIB library.**
 
   A new flag field is introduced in ``rte_fib_conf`` structure.
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 8a219b985b..45cf6fc39e 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -870,7 +870,10 @@ sso_update_msix_vec_count(struct roc_sso *roc_sso, uint16_t sso_vec_cnt)
 	if (idev == NULL)
 		return -ENODEV;
 
-	mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
+	if (roc_model_is_cn20k())
+		mbox_vec_cnt = RVU_MBOX_PF_INT_VEC_AFPF_MBOX + 1;
+	else
+		mbox_vec_cnt = RVU_PF_INT_VEC_AFPF_MBOX + 1;
 
 	/* Allocating vectors for the first time */
 	if (plt_intr_max_intr_get(pci_dev->intr_handle) == 0) {
@@ -1017,7 +1020,10 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t nb_hwgrp, ui
 	}
 
 	/* 2 error interrupt per TIM LF */
-	sso_vec_cnt += 2 * nb_tim_lfs;
+	if (roc_model_is_cn20k())
+		sso_vec_cnt += 3 * nb_tim_lfs;
+	else
+		sso_vec_cnt += 2 * nb_tim_lfs;
 
 	rc = sso_update_msix_vec_count(roc_sso, sso_vec_cnt);
 	if (rc < 0) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
new file mode 100644
index 0000000000..c4b80f64f3
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include "roc_api.h"
+
+#include "cnxk_eventdev.h"
+
+static void
+cn20k_sso_set_rsrc(void *arg)
+{
+	struct cnxk_sso_evdev *dev = arg;
+
+	dev->max_event_ports = dev->sso.max_hws;
+	dev->max_event_queues = dev->sso.max_hwgrp > RTE_EVENT_MAX_QUEUES_PER_DEV ?
+					RTE_EVENT_MAX_QUEUES_PER_DEV :
+					dev->sso.max_hwgrp;
+}
+
+static void
+cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	dev_info->driver_name = RTE_STR(EVENTDEV_NAME_CN20K_PMD);
+	cnxk_sso_info_get(dev, dev_info);
+	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
+}
+
+static struct eventdev_ops cn20k_sso_dev_ops = {
+	.dev_infos_get = cn20k_sso_info_get,
+};
+
+static int
+cn20k_sso_init(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = roc_plt_init();
+	if (rc < 0) {
+		plt_err("Failed to initialize platform model");
+		return rc;
+	}
+
+	event_dev->dev_ops = &cn20k_sso_dev_ops;
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	rc = cnxk_sso_init(event_dev);
+	if (rc < 0)
+		return rc;
+
+	cn20k_sso_set_rsrc(cnxk_sso_pmd_priv(event_dev));
+	if (!dev->max_event_ports || !dev->max_event_queues) {
+		plt_err("Not enough eventdev resource queues=%d ports=%d", dev->max_event_queues,
+			dev->max_event_ports);
+		cnxk_sso_fini(event_dev);
+		return -ENODEV;
+	}
+
+	plt_sso_dbg("Initializing %s max_queues=%d max_ports=%d", event_dev->data->name,
+		    dev->max_event_queues, dev->max_event_ports);
+
+	return 0;
+}
+
+static int
+cn20k_sso_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	return rte_event_pmd_pci_probe(pci_drv, pci_dev, sizeof(struct cnxk_sso_evdev),
+				       cn20k_sso_init);
+}
+
+static const struct rte_pci_id cn20k_pci_sso_map[] = {
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_PF),
+	CNXK_PCI_ID(PCI_SUBSYSTEM_DEVID_CN20KA, PCI_DEVID_CNXK_RVU_SSO_TIM_VF),
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver cn20k_pci_sso = {
+	.id_table = cn20k_pci_sso_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
+	.probe = cn20k_sso_probe,
+	.remove = cnxk_sso_remove,
+};
+
+RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
+RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
+RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 6757af74bf..21cd5c5ae6 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -14,7 +14,7 @@ else
         soc_type = platform
 endif
 
-if soc_type != 'cn9k' and soc_type != 'cn10k'
+if soc_type != 'cn9k' and soc_type != 'cn10k' and soc_type != 'cn20k'
         soc_type = 'all'
 endif
 
@@ -229,6 +229,12 @@ sources += files(
 endif
 endif
 
+if soc_type == 'cn20k' or soc_type == 'all'
+sources += files(
+        'cn20k_eventdev.c',
+)
+endif
+
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
 if cc.get_id() == 'clang'
         extra_flags += ['-Wno-asm-operand-widths']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 04/22] event/cnxk: add CN20K device config
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 02/22] common/cnxk: implement SSO HW info pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
                                 ` (17 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event device configuration that attaches the requested
number of SSO HWS(event ports) and HWGRP(event queues) LFs to
the RVU PF/VF.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 36 +++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index c4b80f64f3..753a976cd3 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -17,6 +17,17 @@ cn20k_sso_set_rsrc(void *arg)
 					dev->sso.max_hwgrp;
 }
 
+static int
+cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
+{
+	struct cnxk_tim_evdev *tim_dev = cnxk_tim_priv_get();
+	struct cnxk_sso_evdev *dev = arg;
+	uint16_t nb_tim_lfs;
+
+	nb_tim_lfs = tim_dev ? tim_dev->nb_rings : 0;
+	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -27,8 +38,33 @@ cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *de
 	dev_info->max_event_port_enqueue_depth = UINT32_MAX;
 }
 
+static int
+cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int rc;
+
+	rc = cnxk_sso_dev_validate(event_dev, 1, UINT32_MAX);
+	if (rc < 0) {
+		plt_err("Invalid event device configuration");
+		return -EINVAL;
+	}
+
+	rc = cn20k_sso_rsrc_init(dev, dev->nb_event_ports, dev->nb_event_queues);
+	if (rc < 0) {
+		plt_err("Failed to initialize SSO resources");
+		return -ENODEV;
+	}
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
+	.dev_configure = cn20k_sso_dev_configure,
+
+	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 05/22] event/cnxk: add CN20k event queue configuration
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (2 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 04/22] event/cnxk: add CN20K device config pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
                                 ` (16 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add setup and release functions for event queues i.e. SSO HWGRPs.
Allocate buffers in DRAM that hold inflight events.
Register device args to modify inflight event buffer count,
HWGRP QoS and stash.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c |  2 +-
 drivers/event/cnxk/cn20k_eventdev.c | 14 ++++++++++++++
 drivers/event/cnxk/cnxk_eventdev.c  |  4 ++--
 drivers/event/cnxk/cnxk_eventdev.h  |  2 +-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index c7af0fac11..49805dd91d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -1251,7 +1251,7 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn10k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_FORCE_BP "=1"
-			      CN10K_SSO_STASH "=<string>"
+			      CNXK_SSO_STASH "=<string>"
 			      CNXK_TIM_DISABLE_NPA "=1"
 			      CNXK_TIM_CHNK_SLOTS "=<int>"
 			      CNXK_TIM_RINGS_LMT "=<int>"
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 753a976cd3..b876c36806 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -56,6 +56,12 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 		return -ENODEV;
 	}
 
+	rc = cnxk_sso_xaq_allocate(dev);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+cnxk_rsrc_fini:
+	roc_sso_rsrc_fini(&dev->sso);
 	return rc;
 }
 
@@ -64,6 +70,10 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_configure = cn20k_sso_dev_configure,
 
 	.queue_def_conf = cnxk_sso_queue_def_conf,
+	.queue_setup = cnxk_sso_queue_setup,
+	.queue_release = cnxk_sso_queue_release,
+	.queue_attr_set = cnxk_sso_queue_attribute_set,
+
 	.port_def_conf = cnxk_sso_port_def_conf,
 };
 
@@ -127,3 +137,7 @@ static struct rte_pci_driver cn20k_pci_sso = {
 RTE_PMD_REGISTER_PCI(event_cn20k, cn20k_pci_sso);
 RTE_PMD_REGISTER_PCI_TABLE(event_cn20k, cn20k_pci_sso_map);
 RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
+			      CNXK_SSO_XAE_CNT "=<int>"
+			      CNXK_SSO_GGRP_QOS "=<string>"
+			      CNXK_SSO_STASH "=<string>");
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index ab7420ab79..be6a487b59 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -624,8 +624,8 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, struct rte_devargs *devargs)
 			   &dev->force_ena_bp);
 	rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag,
 			   &single_ws);
-	rte_kvargs_process(kvlist, CN10K_SSO_STASH,
-			   &parse_sso_kvargs_stash_dict, dev);
+	rte_kvargs_process(kvlist, CNXK_SSO_STASH, &parse_sso_kvargs_stash_dict,
+			   dev);
 	dev->dual_ws = !single_ws;
 	rte_kvargs_free(kvlist);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 904a9b022d..ba08fa2173 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -27,7 +27,7 @@
 #define CNXK_SSO_GGRP_QOS  "qos"
 #define CNXK_SSO_FORCE_BP  "force_rx_bp"
 #define CN9K_SSO_SINGLE_WS "single_ws"
-#define CN10K_SSO_STASH	   "stash"
+#define CNXK_SSO_STASH	   "stash"
 
 #define CNXK_SSO_MAX_PROFILES 2
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 06/22] event/cnxk: add CN20K event port configuration
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (3 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
                                 ` (15 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add SSO HWS a.k.a event port setup, release, link, unlink
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c         |  63 ++-----
 drivers/event/cnxk/cn20k_eventdev.c         | 174 ++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h         |  26 +++
 drivers/event/cnxk/cnxk_common.h            |  55 +++++++
 drivers/event/cnxk/cnxk_eventdev.h          |   6 +-
 drivers/event/cnxk/cnxk_eventdev_selftest.c |   6 +-
 6 files changed, 276 insertions(+), 54 deletions(-)
 create mode 100644 drivers/event/cnxk/cn20k_eventdev.h
 create mode 100644 drivers/event/cnxk/cnxk_common.h

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 49805dd91d..43bc6c0bac 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -2,15 +2,16 @@
  * Copyright(C) 2021 Marvell.
  */
 
+#include <rte_dmadev_pmd.h>
+
+#include "cn10k_cryptodev_ops.h"
+#include "cn10k_ethdev.h"
 #include "cn10k_tx_worker.h"
 #include "cn10k_worker.h"
-#include "cn10k_ethdev.h"
-#include "cn10k_cryptodev_ops.h"
+#include "cnxk_common.h"
+#include "cnxk_dma_event_dp.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
-#include "cnxk_dma_event_dp.h"
-
-#include <rte_dmadev_pmd.h>
 
 #define CN10K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                           \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
@@ -18,29 +19,6 @@
 #define CN10K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                           \
 	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
 
-static uint32_t
-cn10k_sso_gw_mode_wdata(struct cnxk_sso_evdev *dev)
-{
-	uint32_t wdata = 1;
-
-	if (dev->deq_tmo_ns)
-		wdata |= BIT(16);
-
-	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_NONE:
-	default:
-		break;
-	case CN10K_GW_MODE_PREF:
-		wdata |= BIT(19);
-		break;
-	case CN10K_GW_MODE_PREF_WFE:
-		wdata |= BIT(20) | BIT(19);
-		break;
-	}
-
-	return wdata;
-}
-
 static void *
 cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -61,7 +39,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
 	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
 	ws->hws_id = port_id;
 	ws->swtag_req = 0;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 	ws->gw_rdata = SSO_TT_EMPTY << 32;
 	ws->lmt_base = dev->sso.lmt_base;
 	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
@@ -99,7 +77,7 @@ cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->xaq_lmt = dev->xaq_lmt;
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = ws->lmt_base;
-	ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -220,12 +198,12 @@ cn10k_sso_hws_reset(void *arg, void *hws)
 	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
 
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
@@ -504,18 +482,7 @@ cn10k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
-	switch (event_dev->data->dev_conf.preschedule_type) {
-	default:
-	case RTE_EVENT_PRESCHEDULE_NONE:
-		dev->gw_mode = CN10K_GW_MODE_NONE;
-		break;
-	case RTE_EVENT_PRESCHEDULE:
-		dev->gw_mode = CN10K_GW_MODE_PREF;
-		break;
-	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
-		break;
-	}
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
 
 	rc = cnxk_setup_event_ports(event_dev, cn10k_sso_init_hws_mem,
 				    cn10k_sso_hws_setup);
@@ -598,13 +565,13 @@ cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	/* Check if we have work in PRF_WQE0, if so extract it. */
 	switch (dev->gw_mode) {
-	case CN10K_GW_MODE_PREF:
-	case CN10K_GW_MODE_PREF_WFE:
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
 		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
 		       BIT_ULL(63))
 			;
 		break;
-	case CN10K_GW_MODE_NONE:
+	case CNXK_GW_MODE_NONE:
 	default:
 		break;
 	}
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index b876c36806..611906a4f0 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,7 +4,87 @@
 
 #include "roc_api.h"
 
+#include "cn20k_eventdev.h"
+#include "cnxk_common.h"
 #include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static void *
+cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws;
+
+	/* Allocate event port memory */
+	ws = rte_zmalloc("cn20k_ws", sizeof(struct cn20k_sso_hws) + RTE_CACHE_LINE_SIZE,
+			 RTE_CACHE_LINE_SIZE);
+	if (ws == NULL) {
+		plt_err("Failed to alloc memory for port=%d", port_id);
+		return NULL;
+	}
+
+	/* First cache line is reserved for cookie */
+	ws = (struct cn20k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
+	ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+	ws->hws_id = port_id;
+	ws->swtag_req = 0;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->gw_rdata = SSO_TT_EMPTY << 32;
+	ws->xae_waes = dev->sso.feat.xaq_wq_entries;
+
+	return ws;
+}
+
+static int
+cn20k_sso_hws_link(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_link(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static int
+cn20k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = port;
+
+	return roc_sso_hws_unlink(&dev->sso, ws->hws_id, map, nb_link, profile, 0);
+}
+
+static void
+cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t val;
+
+	ws->grp_base = grp_base;
+	ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+	ws->xaq_lmt = dev->xaq_lmt;
+	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
+	ws->aw_lmt = dev->sso.lmt_base;
+	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+
+	/* Set get_work timeout for HWS */
+	val = NSEC2USEC(dev->deq_tmo_ns);
+	val = val ? val - 1 : 0;
+	plt_write64(val, ws->base + SSOW_LF_GWS_NW_TIM);
+}
+
+static void
+cn20k_sso_hws_release(void *arg, void *hws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cn20k_sso_hws *ws = hws;
+	uint16_t i, j;
+
+	for (i = 0; i < CNXK_SSO_MAX_PROFILES; i++)
+		for (j = 0; j < dev->nb_event_queues; j++)
+			roc_sso_hws_unlink(&dev->sso, ws->hws_id, &j, 1, i, 0);
+	memset(ws, 0, sizeof(*ws));
+}
 
 static void
 cn20k_sso_set_rsrc(void *arg)
@@ -60,11 +140,98 @@ cn20k_sso_dev_configure(const struct rte_eventdev *event_dev)
 	if (rc < 0)
 		goto cnxk_rsrc_fini;
 
+	dev->gw_mode = cnxk_sso_hws_preschedule_get(event_dev->data->dev_conf.preschedule_type);
+
+	rc = cnxk_setup_event_ports(event_dev, cn20k_sso_init_hws_mem, cn20k_sso_hws_setup);
+	if (rc < 0)
+		goto cnxk_rsrc_fini;
+
+	/* Restore any prior port-queue mapping. */
+	cnxk_sso_restore_links(event_dev, cn20k_sso_hws_link);
+
+	dev->configured = 1;
+	rte_mb();
+
+	return 0;
 cnxk_rsrc_fini:
 	roc_sso_rsrc_fini(&dev->sso);
+	dev->nb_event_ports = 0;
 	return rc;
 }
 
+static int
+cn20k_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
+		     const struct rte_event_port_conf *port_conf)
+{
+
+	RTE_SET_USED(port_conf);
+	return cnxk_sso_port_setup(event_dev, port_id, cn20k_sso_hws_setup);
+}
+
+static void
+cn20k_sso_port_release(void *port)
+{
+	struct cnxk_sso_hws_cookie *gws_cookie = cnxk_sso_hws_get_cookie(port);
+	struct cnxk_sso_evdev *dev;
+
+	if (port == NULL)
+		return;
+
+	dev = cnxk_sso_pmd_priv(gws_cookie->event_dev);
+	if (!gws_cookie->configured)
+		goto free;
+
+	cn20k_sso_hws_release(dev, port);
+	memset(gws_cookie, 0, sizeof(*gws_cookie));
+free:
+	rte_free(gws_cookie);
+}
+
+static int
+cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_links];
+	uint16_t link;
+
+	RTE_SET_USED(priorities);
+	for (link = 0; link < nb_links; link++)
+		hwgrp_ids[link] = queues[link];
+	nb_links = cn20k_sso_hws_link(dev, port, hwgrp_ids, nb_links, profile);
+
+	return (int)nb_links;
+}
+
+static int
+cn20k_sso_port_unlink_profile(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+			      uint16_t nb_unlinks, uint8_t profile)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t hwgrp_ids[nb_unlinks];
+	uint16_t unlink;
+
+	for (unlink = 0; unlink < nb_unlinks; unlink++)
+		hwgrp_ids[unlink] = queues[unlink];
+	nb_unlinks = cn20k_sso_hws_unlink(dev, port, hwgrp_ids, nb_unlinks, profile);
+
+	return (int)nb_unlinks;
+}
+
+static int
+cn20k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
+		    const uint8_t priorities[], uint16_t nb_links)
+{
+	return cn20k_sso_port_link_profile(event_dev, port, queues, priorities, nb_links, 0);
+}
+
+static int
+cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues[],
+		      uint16_t nb_unlinks)
+{
+	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -75,6 +242,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.queue_attr_set = cnxk_sso_queue_attribute_set,
 
 	.port_def_conf = cnxk_sso_port_def_conf,
+	.port_setup = cn20k_sso_port_setup,
+	.port_release = cn20k_sso_port_release,
+	.port_link = cn20k_sso_port_link,
+	.port_unlink = cn20k_sso_port_unlink,
+	.port_link_profile = cn20k_sso_port_link_profile,
+	.port_unlink_profile = cn20k_sso_port_unlink_profile,
+	.timeout_ticks = cnxk_sso_timeout_ticks,
 };
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
new file mode 100644
index 0000000000..5b6c558d5a
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef __CN20K_EVENTDEV_H__
+#define __CN20K_EVENTDEV_H__
+
+#define CN20K_SSO_DEFAULT_STASH_OFFSET -1
+#define CN20K_SSO_DEFAULT_STASH_LENGTH 2
+
+struct __rte_cache_aligned cn20k_sso_hws {
+	uint64_t base;
+	uint32_t gw_wdata;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+	/* Add Work Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
+	int64_t __rte_atomic *fc_cache_space;
+	uintptr_t aw_lmt;
+	uintptr_t grp_base;
+	uint16_t xae_waes;
+	int32_t xaq_lmt;
+};
+
+#endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
new file mode 100644
index 0000000000..712d82bee7
--- /dev/null
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CNXK_COMMON_H__
+#define __CNXK_COMMON_H__
+
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+static uint32_t
+cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
+{
+	uint32_t wdata = 1;
+
+	if (dev->deq_tmo_ns)
+		wdata |= BIT(16);
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	case CNXK_GW_MODE_PREF:
+		wdata |= BIT(19);
+		break;
+	case CNXK_GW_MODE_PREF_WFE:
+		wdata |= BIT(20) | BIT(19);
+		break;
+	}
+
+	return wdata;
+}
+
+static uint8_t
+cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
+{
+	uint8_t gw_mode = 0;
+
+	switch (preschedule_type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		gw_mode = CNXK_GW_MODE_NONE;
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		gw_mode = CNXK_GW_MODE_PREF;
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		gw_mode = CNXK_GW_MODE_PREF_WFE;
+		break;
+	}
+
+	return gw_mode;
+}
+
+#endif /* __CNXK_COMMON_H__ */
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index ba08fa2173..4066497e6b 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -38,9 +38,9 @@
 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
 #define CN9K_DUAL_WS_NB_WS	     2
 
-#define CN10K_GW_MODE_NONE     0
-#define CN10K_GW_MODE_PREF     1
-#define CN10K_GW_MODE_PREF_WFE 2
+#define CNXK_GW_MODE_NONE     0
+#define CNXK_GW_MODE_PREF     1
+#define CNXK_GW_MODE_PREF_WFE 2
 
 #define CNXK_QOS_NORMALIZE(val, min, max, cnt)                                 \
 	(min + val / ((max + cnt - 1) / cnt))
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 311de3d92b..7a3262bcff 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1568,15 +1568,15 @@ cnxk_sso_selftest(const char *dev_name)
 
 	if (roc_model_runtime_is_cn10k()) {
 		printf("Verifying CN10K workslot getwork mode none\n");
-		dev->gw_mode = CN10K_GW_MODE_NONE;
+		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF;
+		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
-		dev->gw_mode = CN10K_GW_MODE_PREF_WFE;
+		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 07/22] event/cnxk: add CN20K SSO enqueue fast path
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (4 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
                                 ` (14 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton, Anatoly Burakov; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS fastpath event device enqueue functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  20 +-
 drivers/event/cnxk/cn20k_worker.c   | 384 ++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  21 ++
 drivers/event/cnxk/meson.build      |   1 +
 4 files changed, 425 insertions(+), 1 deletion(-)
 create mode 100644 drivers/event/cnxk/cn20k_worker.c
 create mode 100644 drivers/event/cnxk/cn20k_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 611906a4f0..a5dd03de6e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -5,6 +5,7 @@
 #include "roc_api.h"
 
 #include "cn20k_eventdev.h"
+#include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -108,6 +109,21 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+
+static void
+cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(RTE_ARCH_ARM64)
+
+	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
+	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
+	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
 static void
 cn20k_sso_info_get(struct rte_eventdev *event_dev, struct rte_event_dev_info *dev_info)
 {
@@ -265,8 +281,10 @@ cn20k_sso_init(struct rte_eventdev *event_dev)
 
 	event_dev->dev_ops = &cn20k_sso_dev_ops;
 	/* For secondary processes, the primary has done all the work */
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		cn20k_sso_fp_fns_set(event_dev);
 		return 0;
+	}
 
 	rc = cnxk_sso_init(event_dev);
 	if (rc < 0)
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
new file mode 100644
index 0000000000..c7de493681
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#include <rte_vect.h>
+
+#include "roc_api.h"
+
+#include "cn20k_worker.h"
+#include "cnxk_eventdev.h"
+#include "cnxk_worker.h"
+
+/* SSO Operations */
+
+static __rte_always_inline uint8_t
+cn20k_sso_hws_new_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint64_t event_ptr = ev->u64;
+	const uint16_t grp = ev->queue_id;
+
+	rte_atomic_thread_fence(rte_memory_order_acq_rel);
+	if (ws->xaq_lmt <= *ws->fc_mem)
+		return 0;
+
+	cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grp_base + (grp << 12));
+	return 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_swtag(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
+
+	/* CNXK model
+	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
+	 *
+	 * SSO_TT_ORDERED        norm           norm             untag
+	 * SSO_TT_ATOMIC         norm           norm		   untag
+	 * SSO_TT_UNTAGGED       norm           norm             NOOP
+	 */
+
+	if (new_tt == SSO_TT_UNTAGGED) {
+		if (cur_tt != SSO_TT_UNTAGGED)
+			cnxk_sso_hws_swtag_untag(ws->base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+	} else {
+		cnxk_sso_hws_swtag_norm(tag, new_tt, ws->base + SSOW_LF_GWS_OP_SWTAG_NORM);
+	}
+	ws->swtag_req = 1;
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_fwd_group(struct cn20k_sso_hws *ws, const struct rte_event *ev, const uint16_t grp)
+{
+	const uint32_t tag = (uint32_t)ev->event;
+	const uint8_t new_tt = ev->sched_type;
+
+	plt_write64(ev->u64, ws->base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+	cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
+}
+
+static __rte_always_inline void
+cn20k_sso_hws_forward_event(struct cn20k_sso_hws *ws, const struct rte_event *ev)
+{
+	const uint8_t grp = ev->queue_id;
+
+	/* Group hasn't changed, Use SWTAG to forward the event */
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
+		cn20k_sso_hws_fwd_swtag(ws, ev);
+	else
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
+		cn20k_sso_hws_fwd_group(ws, ev, grp);
+}
+
+static inline int32_t
+sso_read_xaq_space(struct cn20k_sso_hws *ws)
+{
+	return (ws->xaq_lmt - rte_atomic_load_explicit(ws->fc_mem, rte_memory_order_relaxed)) *
+	       ws->xae_waes;
+}
+
+static inline void
+sso_lmt_aw_wait_fc(struct cn20k_sso_hws *ws, int64_t req)
+{
+	int64_t cached, refill;
+
+retry:
+	while (rte_atomic_load_explicit(ws->fc_cache_space, rte_memory_order_relaxed) < 0)
+		;
+
+	cached = rte_atomic_fetch_sub_explicit(ws->fc_cache_space, req, rte_memory_order_acquire) -
+		 req;
+	/* Check if there is enough space, else update and retry. */
+	if (cached < 0) {
+		/* Check if we have space else retry. */
+		do {
+			refill = sso_read_xaq_space(ws);
+		} while (refill <= 0);
+		rte_atomic_compare_exchange_strong_explicit(ws->fc_cache_space, &cached, refill,
+							    rte_memory_order_release,
+							    rte_memory_order_relaxed);
+
+		goto retry;
+	}
+}
+
+#define VECTOR_SIZE_BITS	     0xFFFFFFFFFFF80000ULL
+#define VECTOR_GET_LINE_OFFSET(line) (19 + (3 * line))
+
+static uint64_t
+vector_size_partial_mask(uint16_t off, uint16_t cnt)
+{
+	return (VECTOR_SIZE_BITS & ~(~0x0ULL << off)) | ((uint64_t)(cnt - 1) << off);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_new_event_lmtst(struct cn20k_sso_hws *ws, uint8_t queue_id,
+			      const struct rte_event ev[], uint16_t n)
+{
+	uint16_t lines, partial_line, burst, left;
+	uint64_t wdata[2], pa[2] = {0};
+	uintptr_t lmt_addr;
+	uint16_t sz0, sz1;
+	uint16_t lmt_id;
+
+	sz0 = sz1 = 0;
+	lmt_addr = ws->aw_lmt;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	left = n;
+again:
+	burst = RTE_MIN(BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2 + ROC_LMT_LINES_PER_CORE_LOG2), left);
+
+	/* Set wdata */
+	lines = burst >> ROC_SSO_AW_PER_LMT_LINE_LOG2;
+	partial_line = burst & (BIT(ROC_SSO_AW_PER_LMT_LINE_LOG2) - 1);
+	wdata[0] = wdata[1] = 0;
+	if (lines > BIT(ROC_LMT_LINES_PER_STR_LOG2)) {
+		wdata[0] = lmt_id;
+		wdata[0] |= 15ULL << 12;
+		wdata[0] |= VECTOR_SIZE_BITS;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = 16 << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+
+		wdata[1] = lmt_id + 16;
+		pa[1] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+
+		lines -= 17;
+		wdata[1] |= partial_line ? (uint64_t)(lines + 1) << 12 : (uint64_t)(lines << 12);
+		wdata[1] |= partial_line ? vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								    partial_line) :
+					   VECTOR_SIZE_BITS;
+		sz1 = burst - sz0;
+		partial_line = 0;
+	} else if (lines) {
+		/* We need to handle two cases here:
+		 * 1. Partial line spill over to wdata[1] i.e. lines == 16
+		 * 2. Partial line with spill lines < 16.
+		 */
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) | (0x7 << 4);
+		sz0 = lines << ROC_SSO_AW_PER_LMT_LINE_LOG2;
+		if (lines == 16) {
+			wdata[0] |= 15ULL << 12;
+			wdata[0] |= VECTOR_SIZE_BITS;
+			if (partial_line) {
+				wdata[1] = lmt_id + 16;
+				pa[1] = (ws->grp_base + (queue_id << 12) +
+					 SSO_LF_GGRP_OP_AW_LMTST) |
+					((partial_line - 1) << 4);
+			}
+		} else {
+			lines -= 1;
+			wdata[0] |= partial_line ? (uint64_t)(lines + 1) << 12 :
+						   (uint64_t)(lines << 12);
+			wdata[0] |= partial_line ?
+					    vector_size_partial_mask(VECTOR_GET_LINE_OFFSET(lines),
+								     partial_line) :
+					    VECTOR_SIZE_BITS;
+			sz0 += partial_line;
+		}
+		sz1 = burst - sz0;
+		partial_line = 0;
+	}
+
+	/* Only partial lines */
+	if (partial_line) {
+		wdata[0] = lmt_id;
+		pa[0] = (ws->grp_base + (queue_id << 12) + SSO_LF_GGRP_OP_AW_LMTST) |
+			((partial_line - 1) << 4);
+		sz0 = partial_line;
+		sz1 = burst - sz0;
+	}
+
+#if defined(RTE_ARCH_ARM64)
+	uint64x2_t aw_mask = {0xC0FFFFFFFFULL, ~0x0ULL};
+	uint64x2_t tt_mask = {0x300000000ULL, 0};
+	uint16_t parts;
+
+	while (burst) {
+		parts = burst > 7 ? 8 : plt_align32prevpow2(burst);
+		burst -= parts;
+		/* Lets try to fill at least one line per burst. */
+		switch (parts) {
+		case 8: {
+			uint64x2_t aw0, aw1, aw2, aw3, aw4, aw5, aw6, aw7;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+			aw4 = vandq_u64(vld1q_u64((const uint64_t *)&ev[4]), aw_mask);
+			aw5 = vandq_u64(vld1q_u64((const uint64_t *)&ev[5]), aw_mask);
+			aw6 = vandq_u64(vld1q_u64((const uint64_t *)&ev[6]), aw_mask);
+			aw7 = vandq_u64(vld1q_u64((const uint64_t *)&ev[7]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+			aw4 = vorrq_u64(vandq_u64(vshrq_n_u64(aw4, 6), tt_mask), aw4);
+			aw5 = vorrq_u64(vandq_u64(vshrq_n_u64(aw5, 6), tt_mask), aw5);
+			aw6 = vorrq_u64(vandq_u64(vshrq_n_u64(aw6, 6), tt_mask), aw6);
+			aw7 = vorrq_u64(vandq_u64(vshrq_n_u64(aw7, 6), tt_mask), aw7);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 64), aw4);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 80), aw5);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 96), aw6);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 112), aw7);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 128);
+		} break;
+		case 4: {
+			uint64x2_t aw0, aw1, aw2, aw3;
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+			aw2 = vandq_u64(vld1q_u64((const uint64_t *)&ev[2]), aw_mask);
+			aw3 = vandq_u64(vld1q_u64((const uint64_t *)&ev[3]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+			aw2 = vorrq_u64(vandq_u64(vshrq_n_u64(aw2, 6), tt_mask), aw2);
+			aw3 = vorrq_u64(vandq_u64(vshrq_n_u64(aw3, 6), tt_mask), aw3);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 32), aw2);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 48), aw3);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 64);
+		} break;
+		case 2: {
+			uint64x2_t aw0, aw1;
+
+			aw0 = vandq_u64(vld1q_u64((const uint64_t *)&ev[0]), aw_mask);
+			aw1 = vandq_u64(vld1q_u64((const uint64_t *)&ev[1]), aw_mask);
+
+			aw0 = vorrq_u64(vandq_u64(vshrq_n_u64(aw0, 6), tt_mask), aw0);
+			aw1 = vorrq_u64(vandq_u64(vshrq_n_u64(aw1, 6), tt_mask), aw1);
+
+			vst1q_u64((void *)lmt_addr, aw0);
+			vst1q_u64((void *)PLT_PTR_ADD(lmt_addr, 16), aw1);
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 32);
+		} break;
+		case 1: {
+			__uint128_t aw0;
+
+			aw0 = ev[0].u64;
+			aw0 <<= 64;
+			aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+			aw0 |= (uint64_t)ev[0].sched_type << 32;
+
+			*((__uint128_t *)lmt_addr) = aw0;
+			lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+		} break;
+		}
+		ev += parts;
+	}
+#else
+	uint16_t i;
+
+	for (i = 0; i < burst; i++) {
+		__uint128_t aw0;
+
+		aw0 = ev[0].u64;
+		aw0 <<= 64;
+		aw0 |= ev[0].event & (BIT_ULL(32) - 1);
+		aw0 |= (uint64_t)ev[0].sched_type << 32;
+		*((__uint128_t *)lmt_addr) = aw0;
+		lmt_addr = (uintptr_t)PLT_PTR_ADD(lmt_addr, 16);
+	}
+#endif
+
+	/* wdata[0] will be always valid */
+	sso_lmt_aw_wait_fc(ws, sz0);
+	roc_lmt_submit_steorl(wdata[0], pa[0]);
+	if (wdata[1]) {
+		sso_lmt_aw_wait_fc(ws, sz1);
+		roc_lmt_submit_steorl(wdata[1], pa[1]);
+	}
+
+	left -= (sz0 + sz1);
+	if (left)
+		goto again;
+
+	return n;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	switch (ev->op) {
+	case RTE_EVENT_OP_NEW:
+		return cn20k_sso_hws_new_event(ws, ev);
+	case RTE_EVENT_OP_FORWARD:
+		cn20k_sso_hws_forward_event(ws, ev);
+		break;
+	case RTE_EVENT_OP_RELEASE:
+		if (ws->swtag_req) {
+			cnxk_sso_hws_desched(ev->u64, ws->base);
+			ws->swtag_req = 0;
+			break;
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	uint16_t idx = 0, done = 0, rc = 0;
+	struct cn20k_sso_hws *ws = port;
+	uint8_t queue_id;
+	int32_t space;
+
+	/* Do a common back-pressure check and return */
+	space = sso_read_xaq_space(ws) - ws->xae_waes;
+	if (space <= 0)
+		return 0;
+	nb_events = space < nb_events ? space : nb_events;
+
+	do {
+		queue_id = ev[idx].queue_id;
+		for (idx = idx + 1; idx < nb_events; idx++)
+			if (queue_id != ev[idx].queue_id)
+				break;
+
+		rc = cn20k_sso_hws_new_event_lmtst(ws, queue_id, &ev[done], idx - done);
+		if (rc != (idx - done))
+			return rc + done;
+		done += rc;
+
+	} while (done < nb_events);
+
+	return done;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(nb_events);
+	cn20k_sso_hws_forward_event(ws, ev);
+
+	return 1;
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
new file mode 100644
index 0000000000..5ff8f11b38
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_WORKER_H__
+#define __CN20K_WORKER_H__
+
+#include <rte_eventdev.h>
+
+#include "cnxk_worker.h"
+#include "cn20k_eventdev.h"
+
+/* CN20K Fastpath functions. */
+uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
+					   uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
+					       uint16_t nb_events);
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 21cd5c5ae6..d0dc2320e1 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -232,6 +232,7 @@ endif
 if soc_type == 'cn20k' or soc_type == 'all'
 sources += files(
         'cn20k_eventdev.c',
+        'cn20k_worker.c',
 )
 endif
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 08/22] event/cnxk: add CN20K SSO dequeue fast path
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (5 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
                                 ` (13 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS event dequeue fastpath functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |   5 +
 drivers/event/cnxk/cn20k_worker.c   |  54 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   | 137 +++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a5dd03de6e..d1668a00c1 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -114,11 +114,16 @@ static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
+	if (dev->deq_tmo_ns)
+		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
+
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index c7de493681..2dcde0b444 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -382,3 +382,57 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 
 	return 1;
 }
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(timeout_ticks);
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return 1;
+	}
+
+	return cn20k_sso_hws_get_work(ws, ev, 0);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, 0);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, 0);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5ff8f11b38..8dc60a06ec 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -7,8 +7,136 @@
 
 #include <rte_eventdev.h>
 
-#include "cnxk_worker.h"
 #include "cn20k_eventdev.h"
+#include "cnxk_worker.h"
+
+static __rte_always_inline void
+cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
+{
+	RTE_SET_USED(ws);
+	RTE_SET_USED(flags);
+
+	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
+		 (u64[0] & 0xffffffff);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+	gw.get_work = ws->gw_wdata;
+#if defined(RTE_ARCH_ARM64)
+#if defined(__clang__)
+	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbz %[x0], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(".arch armv8-a+lse\n"
+		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+	gw.u64[0] = x0;
+	gw.u64[1] = x1;
+#else
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbz %[wdata], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [wdata] "=&r"(gw.get_work)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+		     : [wdata] "+r"(gw.get_work)
+		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+#endif
+#else
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
+#endif
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
+
+/* Used in cleaning up workslot. */
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work_empty(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+#ifdef RTE_ARCH_ARM64
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbz %[tag], 63, .Ldone%=		\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbnz %[tag], 63, .Lrty%=		\n"
+		     ".Ldone%=:	dmb ld					\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
+		     : "memory");
+#else
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+#endif
+
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+	else
+		gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+			    (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
 
 /* CN20K Fastpath functions. */
 uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
@@ -18,4 +146,11 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 
+uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+					   uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
+					       uint16_t nb_events, uint64_t timeout_ticks);
+
 #endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 09/22] event/cnxk: add CN20K event port quiesce
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (6 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
                                 ` (12 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port quiesce function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 60 +++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index d1668a00c1..56e3eb87fb 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -208,6 +208,65 @@ cn20k_sso_port_release(void *port)
 	rte_free(gws_cookie);
 }
 
+static void
+cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+		       rte_eventdev_port_flush_t flush_cb, void *args)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct cn20k_sso_hws *ws = port;
+	struct rte_event ev;
+	uint64_t ptag;
+	bool is_pend;
+
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+		is_pend = true;
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	if (is_pend && ev.u64)
+		if (flush_cb)
+			flush_cb(event_dev->data->dev_id, ev, args);
+	ptag = (plt_read64(ws->base + SSOW_LF_GWS_TAG) >> 32) & SSO_TT_EMPTY;
+	if (ptag != SSO_TT_EMPTY)
+		cnxk_sso_hws_swtag_flush(ws->base);
+
+	do {
+		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+	} while (ptag & BIT_ULL(56));
+
+	/* Check if we have work in PRF_WQE0, if so extract it. */
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (ev.u64) {
+			if (flush_cb)
+				flush_cb(event_dev->data->dev_id, ev, args);
+		}
+		cnxk_sso_hws_swtag_flush(ws->base);
+		do {
+			ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (ptag & BIT_ULL(56));
+	}
+	ws->swtag_req = 0;
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn20k_sso_port_link_profile(struct rte_eventdev *event_dev, void *port, const uint8_t queues[],
 			    const uint8_t priorities[], uint16_t nb_links, uint8_t profile)
@@ -265,6 +324,7 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_def_conf = cnxk_sso_port_def_conf,
 	.port_setup = cn20k_sso_port_setup,
 	.port_release = cn20k_sso_port_release,
+	.port_quiesce = cn20k_sso_port_quiesce,
 	.port_link = cn20k_sso_port_link,
 	.port_unlink = cn20k_sso_port_unlink,
 	.port_link_profile = cn20k_sso_port_link_profile,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 10/22] event/cnxk: add CN20K event port profile switch
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (7 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
                                 ` (11 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port profile switch.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |  1 +
 drivers/event/cnxk/cn20k_worker.c   | 11 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  1 +
 3 files changed, 13 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 56e3eb87fb..53b0b43199 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -124,6 +124,7 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 	if (dev->deq_tmo_ns)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
+	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2dcde0b444..2c723523d2 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -383,6 +383,17 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 	return 1;
 }
 
+int __rte_hot
+cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(0xFFUL);
+	ws->gw_wdata |= (profile + 1);
+
+	return 0;
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 8dc60a06ec..447f28f0f2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -145,6 +145,7 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 					       uint16_t nb_events);
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
+int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 11/22] event/cnxk: add CN20K event port preschedule
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (8 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 12/22] event/cnxk: add CN20K device start pbhagavatula
                                 ` (10 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K event port preschedule modify and preschedule
functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/ssow.h       |  1 +
 drivers/event/cnxk/cn20k_eventdev.c |  2 ++
 drivers/event/cnxk/cn20k_worker.c   | 30 +++++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_worker.h   |  3 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
index c146a8c3ef..ec6bd7896b 100644
--- a/drivers/common/cnxk/hw/ssow.h
+++ b/drivers/common/cnxk/hw/ssow.h
@@ -37,6 +37,7 @@
 #define SSOW_LF_GWS_PRF_WQE1	     (0x448ull) /* [CN10K, .) */
 #define SSOW_LF_GWS_OP_GET_WORK0     (0x600ull)
 #define SSOW_LF_GWS_OP_GET_WORK1     (0x608ull) /* [CN10K, .) */
+#define SSOW_LF_GWS_OP_PRF_GETWORK   (0x610ull) /* [CN20K, .) */
 #define SSOW_LF_GWS_OP_SWTAG_FLUSH   (0x800ull)
 #define SSOW_LF_GWS_OP_SWTAG_UNTAG   (0x810ull)
 #define SSOW_LF_GWS_OP_SWTP_CLR	     (0x820ull)
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 53b0b43199..a788eeed63 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -125,6 +125,8 @@ cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
 
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
+	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
+	event_dev->preschedule = cn20k_sso_hws_preschedule;
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index 2c723523d2..ebfe863bc5 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -394,6 +394,36 @@ cn20k_sso_hws_profile_switch(void *port, uint8_t profile)
 	return 0;
 }
 
+int __rte_hot
+cn20k_sso_hws_preschedule_modify(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	ws->gw_wdata &= ~(BIT(19) | BIT(20));
+	switch (type) {
+	default:
+	case RTE_EVENT_PRESCHEDULE_NONE:
+		break;
+	case RTE_EVENT_PRESCHEDULE:
+		ws->gw_wdata |= BIT(19);
+		break;
+	case RTE_EVENT_PRESCHEDULE_ADAPTIVE:
+		ws->gw_wdata |= BIT(19) | BIT(20);
+		break;
+	}
+
+	return 0;
+}
+
+void __rte_hot
+cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(type);
+	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
+}
+
 uint16_t __rte_hot
 cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
 {
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 447f28f0f2..dd8b72bc53 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -146,6 +146,9 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 int __rte_hot cn20k_sso_hws_profile_switch(void *port, uint8_t profile);
+int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
+					       enum rte_event_dev_preschedule_type type);
+void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
 uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
 uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 12/22] event/cnxk: add CN20K device start
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (9 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
                                 ` (9 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K start function along with few cleanup API's to maintain
sanity.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c | 103 +--------------------------
 drivers/event/cnxk/cn20k_eventdev.c |  76 ++++++++++++++++++++
 drivers/event/cnxk/cnxk_common.h    | 104 ++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 100 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 43bc6c0bac..f2e591f547 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -154,83 +154,6 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
 	return 0;
 }
 
-static void
-cn10k_sso_hws_reset(void *arg, void *hws)
-{
-	struct cnxk_sso_evdev *dev = arg;
-	struct cn10k_sso_hws *ws = hws;
-	uintptr_t base = ws->base;
-	uint64_t pend_state;
-	union {
-		__uint128_t wdata;
-		uint64_t u64[2];
-	} gw;
-	uint8_t pend_tt;
-	bool is_pend;
-
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
-	/* Wait till getwork/swtp/waitw/desched completes. */
-	is_pend = false;
-	/* Work in WQE0 is always consumed, unless its a SWTAG. */
-	pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
-	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
-	    ws->swtag_req)
-		is_pend = true;
-
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
-			       BIT_ULL(56) | BIT_ULL(54)));
-	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
-			cnxk_sso_hws_swtag_untag(base +
-						 SSOW_LF_GWS_OP_SWTAG_UNTAG);
-		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-	} else if (pend_tt != SSO_TT_EMPTY) {
-		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
-	}
-
-	/* Wait for desched to complete. */
-	do {
-		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
-	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
-
-	switch (dev->gw_mode) {
-	case CNXK_GW_MODE_PREF:
-	case CNXK_GW_MODE_PREF_WFE:
-		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
-			;
-		break;
-	case CNXK_GW_MODE_NONE:
-	default:
-		break;
-	}
-
-	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) !=
-	    SSO_TT_EMPTY) {
-		plt_write64(BIT_ULL(16) | 1,
-			    ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		do {
-			roc_load_pair(gw.u64[0], gw.u64[1],
-				      ws->base + SSOW_LF_GWS_WQE0);
-		} while (gw.u64[0] & BIT_ULL(63));
-		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
-			if (pend_tt == SSO_TT_ATOMIC ||
-			    pend_tt == SSO_TT_ORDERED)
-				cnxk_sso_hws_swtag_untag(
-					base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
-			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
-		}
-	}
-
-	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
-	rte_mb();
-}
-
 static void
 cn10k_sso_set_rsrc(void *arg)
 {
@@ -640,24 +563,6 @@ cn10k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn10k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
-static void
-cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev)
-{
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
-	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
-	int i, rc;
-
-	plt_sso_dbg();
-	for (i = 0; i < dev->stash_cnt; i++) {
-		stash[i].hwgrp = dev->stash_parse_data[i].queue;
-		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
-		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
-	}
-	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
-	if (rc < 0)
-		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
-}
-
 static int
 cn10k_sso_start(struct rte_eventdev *event_dev)
 {
@@ -669,9 +574,8 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
 	if (rc < 0)
 		return rc;
 
-	cn10k_sso_configure_queue_stash(event_dev);
-	rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
-			    cn10k_sso_hws_flush_events);
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 	if (rc < 0)
 		return rc;
 	cn10k_sso_fp_fns_set(event_dev);
@@ -692,8 +596,7 @@ cn10k_sso_stop(struct rte_eventdev *event_dev)
 	for (i = 0; i < event_dev->data->nb_ports; i++)
 		hws[i] = i;
 	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
-	cnxk_sso_stop(event_dev, cn10k_sso_hws_reset,
-		      cn10k_sso_hws_flush_events);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn10k_sso_hws_flush_events);
 }
 
 static int
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a788eeed63..69c593ed60 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -87,6 +87,61 @@ cn20k_sso_hws_release(void *arg, void *hws)
 	memset(ws, 0, sizeof(*ws));
 }
 
+static int
+cn20k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, cnxk_handle_event_t fn,
+			   void *arg)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg);
+	uint64_t retry = CNXK_SSO_FLUSH_RETRY_MAX;
+	struct cn20k_sso_hws *ws = hws;
+	uint64_t cq_ds_cnt = 1;
+	uint64_t aq_cnt = 1;
+	uint64_t ds_cnt = 1;
+	struct rte_event ev;
+	uint64_t val, req;
+
+	plt_write64(0, base + SSO_LF_GGRP_QCTL);
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	req = queue_id;	    /* GGRP ID */
+	req |= BIT_ULL(18); /* Grouped */
+	req |= BIT_ULL(16); /* WAIT */
+
+	aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+	ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+	cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+	cq_ds_cnt &= 0x3FFF3FFF0000;
+
+	while (aq_cnt || cq_ds_cnt || ds_cnt) {
+		plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		if (fn != NULL && ev.u64 != 0)
+			fn(arg, ev);
+		if (ev.sched_type != SSO_TT_EMPTY)
+			cnxk_sso_hws_swtag_flush(ws->base);
+		else if (retry-- == 0)
+			break;
+		do {
+			val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+		} while (val & BIT_ULL(56));
+		aq_cnt = plt_read64(base + SSO_LF_GGRP_AQ_CNT);
+		ds_cnt = plt_read64(base + SSO_LF_GGRP_MISC_CNT);
+		cq_ds_cnt = plt_read64(base + SSO_LF_GGRP_INT_CNT);
+		/* Extract cq and ds count */
+		cq_ds_cnt &= 0x3FFF3FFF0000;
+	}
+
+	if (aq_cnt || cq_ds_cnt || ds_cnt)
+		return -EAGAIN;
+
+	plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &ws->hws_id, 1);
+	rte_mb();
+
+	return 0;
+}
+
 static void
 cn20k_sso_set_rsrc(void *arg)
 {
@@ -315,6 +370,25 @@ cn20k_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues
 	return cn20k_sso_port_unlink_profile(event_dev, port, queues, nb_unlinks, 0);
 }
 
+static int
+cn20k_sso_start(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int rc, i;
+
+	cnxk_sso_configure_queue_stash(event_dev);
+	rc = cnxk_sso_start(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set(event_dev);
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+
+	return rc;
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -333,6 +407,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_link_profile = cn20k_sso_port_link_profile,
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
+
+	.dev_start = cn20k_sso_start,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_common.h b/drivers/event/cnxk/cnxk_common.h
index 712d82bee7..c361d0530d 100644
--- a/drivers/event/cnxk/cnxk_common.h
+++ b/drivers/event/cnxk/cnxk_common.h
@@ -8,6 +8,15 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+struct cnxk_sso_hws_prf {
+	uint64_t base;
+	uint32_t gw_wdata;
+	void *lookup_mem;
+	uint64_t gw_rdata;
+	uint8_t swtag_req;
+	uint8_t hws_id;
+};
+
 static uint32_t
 cnxk_sso_hws_prf_wdata(struct cnxk_sso_evdev *dev)
 {
@@ -52,4 +61,99 @@ cnxk_sso_hws_preschedule_get(uint8_t preschedule_type)
 	return gw_mode;
 }
 
+static void
+cnxk_sso_hws_reset(void *arg, void *ws)
+{
+	struct cnxk_sso_evdev *dev = arg;
+	struct cnxk_sso_hws_prf *ws_prf;
+	uint64_t pend_state;
+	uint8_t swtag_req;
+	uintptr_t base;
+	uint8_t hws_id;
+	union {
+		__uint128_t wdata;
+		uint64_t u64[2];
+	} gw;
+	uint8_t pend_tt;
+	bool is_pend;
+
+	ws_prf = ws;
+	base = ws_prf->base;
+	hws_id = ws_prf->hws_id;
+	swtag_req = ws_prf->swtag_req;
+
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	/* Wait till getwork/swtp/waitw/desched completes. */
+	is_pend = false;
+	/* Work in WQE0 is always consumed, unless its a SWTAG. */
+	pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || swtag_req)
+		is_pend = true;
+
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state &
+		 (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+	pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+	if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+		if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+			cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+		plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+	} else if (pend_tt != SSO_TT_EMPTY) {
+		plt_write64(0, base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+	}
+
+	/* Wait for desched to complete. */
+	do {
+		pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
+	} while (pend_state & (BIT_ULL(58) | BIT_ULL(56)));
+
+	switch (dev->gw_mode) {
+	case CNXK_GW_MODE_PREF:
+	case CNXK_GW_MODE_PREF_WFE:
+		while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
+			;
+		break;
+	case CNXK_GW_MODE_NONE:
+	default:
+		break;
+	}
+
+	if (CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
+		plt_write64(BIT_ULL(16) | 1, base + SSOW_LF_GWS_OP_GET_WORK0);
+		do {
+			roc_load_pair(gw.u64[0], gw.u64[1], base + SSOW_LF_GWS_WQE0);
+		} while (gw.u64[0] & BIT_ULL(63));
+		pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
+		if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+			if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
+				cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG);
+			plt_write64(0, base + SSOW_LF_GWS_OP_DESCHED);
+		}
+	}
+
+	plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
+	roc_sso_hws_gwc_invalidate(&dev->sso, &hws_id, 1);
+	rte_mb();
+}
+
+static void
+cnxk_sso_configure_queue_stash(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash[dev->stash_cnt];
+	int i, rc;
+
+	plt_sso_dbg();
+	for (i = 0; i < dev->stash_cnt; i++) {
+		stash[i].hwgrp = dev->stash_parse_data[i].queue;
+		stash[i].stash_offset = dev->stash_parse_data[i].stash_offset;
+		stash[i].stash_count = dev->stash_parse_data[i].stash_length;
+	}
+	rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt);
+	if (rc < 0)
+		plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+}
+
 #endif /* __CNXK_COMMON_H__ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 13/22] event/cnxk: add CN20K device stop and close
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (10 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 12/22] event/cnxk: add CN20K device start pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
                                 ` (8 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event device stop and close callback functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 69c593ed60..6195b29705 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -389,6 +389,25 @@ cn20k_sso_start(struct rte_eventdev *event_dev)
 	return rc;
 }
 
+static void
+cn20k_sso_stop(struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint8_t hws[RTE_EVENT_MAX_PORTS_PER_DEV];
+	int i;
+
+	for (i = 0; i < event_dev->data->nb_ports; i++)
+		hws[i] = i;
+	roc_sso_hws_gwc_invalidate(&dev->sso, hws, event_dev->data->nb_ports);
+	cnxk_sso_stop(event_dev, cnxk_sso_hws_reset, cn20k_sso_hws_flush_events);
+}
+
+static int
+cn20k_sso_close(struct rte_eventdev *event_dev)
+{
+	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -409,6 +428,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
 	.dev_start = cn20k_sso_start,
+	.dev_stop = cn20k_sso_stop,
+	.dev_close = cn20k_sso_close,
 };
 
 static int
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 14/22] event/cnxk: add CN20K xstats, selftest and dump
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (11 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
                                 ` (7 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add selftest to verify SSO, xstats to get queue specific
stats and add function to dump internal state of SSO.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test/test_eventdev.c                    |  7 +++++++
 drivers/event/cnxk/cn20k_eventdev.c         | 12 ++++++++++++
 drivers/event/cnxk/cnxk_eventdev_selftest.c |  8 ++++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c
index b03a62fe70..e97754bd47 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -1521,6 +1521,12 @@ test_eventdev_selftest_cn10k(void)
 	return test_eventdev_selftest_impl("event_cn10k", "");
 }
 
+static int
+test_eventdev_selftest_cn20k(void)
+{
+	return test_eventdev_selftest_impl("event_cn20k", "");
+}
+
 #endif /* !RTE_EXEC_ENV_WINDOWS */
 
 REGISTER_FAST_TEST(eventdev_common_autotest, true, true, test_eventdev_common);
@@ -1532,5 +1538,6 @@ REGISTER_DRIVER_TEST(eventdev_selftest_dpaa2, test_eventdev_selftest_dpaa2);
 REGISTER_DRIVER_TEST(eventdev_selftest_dlb2, test_eventdev_selftest_dlb2);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn9k, test_eventdev_selftest_cn9k);
 REGISTER_DRIVER_TEST(eventdev_selftest_cn10k, test_eventdev_selftest_cn10k);
+REGISTER_DRIVER_TEST(eventdev_selftest_cn20k, test_eventdev_selftest_cn20k);
 
 #endif /* !RTE_EXEC_ENV_WINDOWS */
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 6195b29705..793098bd61 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -408,6 +408,12 @@ cn20k_sso_close(struct rte_eventdev *event_dev)
 	return cnxk_sso_close(event_dev, cn20k_sso_hws_unlink);
 }
 
+static int
+cn20k_sso_selftest(void)
+{
+	return cnxk_sso_selftest(RTE_STR(event_cn20k));
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -427,9 +433,15 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.xstats_get = cnxk_sso_xstats_get,
+	.xstats_reset = cnxk_sso_xstats_reset,
+	.xstats_get_names = cnxk_sso_xstats_get_names,
+
+	.dump = cnxk_sso_dump,
 	.dev_start = cn20k_sso_start,
 	.dev_stop = cn20k_sso_stop,
 	.dev_close = cn20k_sso_close,
+	.dev_selftest = cn20k_sso_selftest,
 };
 
 static int
diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c b/drivers/event/cnxk/cnxk_eventdev_selftest.c
index 7a3262bcff..8f3d0982e9 100644
--- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
+++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
@@ -1566,16 +1566,16 @@ cnxk_sso_selftest(const char *dev_name)
 			return rc;
 	}
 
-	if (roc_model_runtime_is_cn10k()) {
-		printf("Verifying CN10K workslot getwork mode none\n");
+	if (roc_model_runtime_is_cn10k() || roc_model_runtime_is_cn20k()) {
+		printf("Verifying %s workslot getwork mode none\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_NONE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode prefetch\n");
+		printf("Verifying %s workslot getwork mode prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-		printf("Verifying CN10K workslot getwork mode smart prefetch\n");
+		printf("Verifying %s workslot getwork mode smart prefetch\n", dev_name);
 		dev->gw_mode = CNXK_GW_MODE_PREF_WFE;
 		if (cnxk_sso_testsuite_run(dev_name))
 			return rc;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 15/22] event/cnxk: support CN20K Rx adapter
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (12 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
                                 ` (6 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for CN20K event eth Rx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c | 121 +++++++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_eventdev.h |   4 +
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 793098bd61..602fbd6359 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -4,6 +4,7 @@
 
 #include "roc_api.h"
 
+#include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
@@ -414,6 +415,117 @@ cn20k_sso_selftest(void)
 	return cnxk_sso_selftest(RTE_STR(event_cn20k));
 }
 
+static int
+cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
+			      const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+	int rc;
+
+	RTE_SET_USED(event_dev);
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 9);
+	if (rc)
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP;
+	else
+		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+
+	return 0;
+}
+
+static void
+cn20k_sso_set_priv_mem(const struct rte_eventdev *event_dev, void *lookup_mem)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		ws->xaq_lmt = dev->xaq_lmt;
+		ws->fc_mem = (int64_t __rte_atomic *)dev->fc_iova;
+		ws->tstamp = dev->tstamp;
+		if (lookup_mem)
+			ws->lookup_mem = lookup_mem;
+	}
+}
+
+static void
+eventdev_fops_tstamp_update(struct rte_eventdev *event_dev)
+{
+	struct rte_event_fp_ops *fp_op = rte_event_fp_ops + event_dev->data->dev_id;
+
+	fp_op->dequeue_burst = event_dev->dequeue_burst;
+}
+
+static void
+cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct cnxk_eth_dev *cnxk_eth_dev = dev->data->dev_private;
+	struct rte_eventdev *event_dev = cnxk_eth_dev->evdev_priv;
+	struct cnxk_sso_evdev *evdev = cnxk_sso_pmd_priv(event_dev);
+
+	evdev->rx_offloads |= flags;
+	if (ptp_en)
+		evdev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
+	else
+		evdev->tstamp[port_id] = NULL;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	eventdev_fops_tstamp_update(event_dev);
+}
+
+static int
+cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
+			       const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_sso_hwgrp_stash stash;
+	struct cn20k_eth_rxq *rxq;
+	void *lookup_mem;
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	if (rc)
+		return -EINVAL;
+
+	cnxk_eth_dev->cnxk_sso_ptp_tstamp_cb = cn20k_sso_tstamp_hdl_update;
+	cnxk_eth_dev->evdev_priv = (struct rte_eventdev *)(uintptr_t)event_dev;
+
+	rxq = eth_dev->data->rx_queues[0];
+	lookup_mem = rxq->lookup_mem;
+	cn20k_sso_set_priv_mem(event_dev, lookup_mem);
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	if (roc_feature_sso_has_stash() && dev->nb_event_ports > 1) {
+		stash.hwgrp = queue_conf->ev.queue_id;
+		stash.stash_offset = CN20K_SSO_DEFAULT_STASH_OFFSET;
+		stash.stash_count = CN20K_SSO_DEFAULT_STASH_LENGTH;
+		rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1);
+		if (rc < 0)
+			plt_warn("failed to configure HWGRP WQE stashing rc = %d", rc);
+	}
+
+	return 0;
+}
+
+static int
+cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id)
+{
+	int rc;
+
+	rc = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (rc)
+		return -EINVAL;
+
+	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -433,6 +545,12 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.port_unlink_profile = cn20k_sso_port_unlink_profile,
 	.timeout_ticks = cnxk_sso_timeout_ticks,
 
+	.eth_rx_adapter_caps_get = cn20k_sso_rx_adapter_caps_get,
+	.eth_rx_adapter_queue_add = cn20k_sso_rx_adapter_queue_add,
+	.eth_rx_adapter_queue_del = cn20k_sso_rx_adapter_queue_del,
+	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
+	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -509,4 +627,5 @@ RTE_PMD_REGISTER_KMOD_DEP(event_cn20k, "vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
-			      CNXK_SSO_STASH "=<string>");
+			      CNXK_SSO_STASH "=<string>"
+			      CNXK_SSO_FORCE_BP "=1");
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 5b6c558d5a..7a6363a89e 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -11,9 +11,13 @@
 struct __rte_cache_aligned cn20k_sso_hws {
 	uint64_t base;
 	uint32_t gw_wdata;
+	void *lookup_mem;
 	uint64_t gw_rdata;
 	uint8_t swtag_req;
 	uint8_t hws_id;
+	/* PTP timestamp */
+	struct cnxk_timesync_info **tstamp;
+	uint64_t meta_aura;
 	/* Add Work Fastpath data */
 	alignas(RTE_CACHE_LINE_SIZE) int64_t __rte_atomic *fc_mem;
 	int64_t __rte_atomic *fc_cache_space;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 16/22] event/cnxk: support CN20K Rx adapter fast path
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (13 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
                                 ` (5 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c           | 122 ++++++++++++-
 drivers/event/cnxk/cn20k_worker.c             |  54 ------
 drivers/event/cnxk/cn20k_worker.h             | 165 +++++++++++++++++-
 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c |  22 +++
 .../cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c   |  22 +++
 .../event/cnxk/deq/cn20k/deq_112_127_burst.c  |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_seg_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_112_127_tmo_burst.c    |  22 +++
 .../deq/cn20k/deq_112_127_tmo_seg_burst.c     |  22 +++
 .../event/cnxk/deq/cn20k/deq_16_31_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_32_47_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_burst.c      |  23 +++
 .../cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_48_63_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_64_79_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_80_95_burst.c    |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_seg_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_burst.c      |  22 +++
 .../cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c  |  22 +++
 .../event/cnxk/deq/cn20k/deq_96_111_burst.c   |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_seg_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_burst.c     |  22 +++
 .../cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c |  22 +++
 .../event/cnxk/deq/cn20k/deq_all_offload.c    |  65 +++++++
 drivers/event/cnxk/meson.build                |  43 +++++
 37 files changed, 1085 insertions(+), 69 deletions(-)
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
 create mode 100644 drivers/event/cnxk/deq/cn20k/deq_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 602fbd6359..408014036a 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -11,6 +11,9 @@
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
 
+#define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
+	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -165,21 +168,124 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+#if defined(RTE_ARCH_ARM64)
+static inline void
+cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
+{
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	const event_dequeue_burst_t sso_hws_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_reas_deq_tmo_seg_burst[NIX_RX_OFFLOAD_MAX] = {
+#define R(name, flags) [flags] = cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_seg_burst);
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_seg_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_deq_seg_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_seg_burst);
+		}
+	} else {
+		if (dev->rx_offloads & NIX_RX_REAS_F) {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+					       sso_hws_reas_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_reas_deq_tmo_burst);
+		} else {
+			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst, sso_hws_deq_burst);
+
+			if (dev->is_timeout_deq)
+				CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
+						       sso_hws_deq_tmo_burst);
+		}
+	}
+
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+
+static inline void
+cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
+{
+#if defined(CNXK_DIS_TMPLT_FUNC)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
+	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
+		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+#else
+	RTE_SET_USED(event_dev);
+#endif
+}
+#endif
 
 static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
-	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	cn20k_sso_fp_blk_fns_set(event_dev);
+	cn20k_sso_fp_tmplt_fns_set(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
-	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
-	if (dev->deq_tmo_ns)
-		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
-
 	event_dev->profile_switch = cn20k_sso_hws_profile_switch;
 	event_dev->preschedule_modify = cn20k_sso_hws_preschedule_modify;
 	event_dev->preschedule = cn20k_sso_hws_preschedule;
@@ -286,7 +392,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 		ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
 	} while (ptag & (BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
 
-	cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+	cn20k_sso_hws_get_work_empty(ws, &ev,
+				     (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 	if (is_pend && ev.u64)
 		if (flush_cb)
 			flush_cb(event_dev->data->dev_id, ev, args);
@@ -312,7 +419,8 @@ cn20k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
 
 	if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != SSO_TT_EMPTY) {
 		plt_write64(BIT_ULL(16) | 1, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-		cn20k_sso_hws_get_work_empty(ws, &ev, 0);
+		cn20k_sso_hws_get_work_empty(
+			ws, &ev, (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | NIX_RX_MULTI_SEG_F);
 		if (ev.u64) {
 			if (flush_cb)
 				flush_cb(event_dev->data->dev_id, ev, args);
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index ebfe863bc5..53daf3b4b0 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -423,57 +423,3 @@ cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type)
 	RTE_SET_USED(type);
 	plt_write64(ws->gw_wdata, ws->base + SSOW_LF_GWS_OP_PRF_GETWORK);
 }
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return 1;
-	}
-
-	return cn20k_sso_hws_get_work(ws, ev, 0);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn20k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
-		return ret;
-	}
-
-	ret = cn20k_sso_hws_get_work(ws, ev, 0);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn20k_sso_hws_get_work(ws, ev, 0);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			    uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index dd8b72bc53..9075073fd2 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -8,16 +8,64 @@
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
+#include "cn20k_rx.h"
 #include "cnxk_worker.h"
 
+/* CN20K Rx event fastpath */
+
+static __rte_always_inline void
+cn20k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id, const uint32_t tag,
+		  const uint32_t flags, const void *const lookup_mem, uintptr_t cpth,
+		  uintptr_t sa_base)
+{
+	const uint64_t mbuf_init =
+		0x100010000ULL | RTE_PKTMBUF_HEADROOM | (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+	struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf;
+
+	cn20k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, (struct rte_mbuf *)mbuf, lookup_mem,
+			      mbuf_init | ((uint64_t)port_id) << 48, cpth, sa_base, flags);
+}
+
+static void
+cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info *tstamp)
+{
+	uint64_t tstamp_ptr;
+	uint8_t laptr;
+
+	laptr = (uint8_t)*(uint64_t *)(u64 + (CNXK_SSO_WQE_LAYR_PTR * sizeof(uint64_t)));
+	if (laptr == sizeof(uint64_t)) {
+		/* Extracting tstamp, if PTP enabled*/
+		tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64) + CNXK_SSO_WQE_SG_PTR);
+		cn20k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, true,
+					 (uint64_t *)tstamp_ptr);
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
-	RTE_SET_USED(ws);
-	RTE_SET_USED(flags);
+	uintptr_t sa_base = 0;
 
 	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
 		 (u64[0] & 0xffffffff);
+	if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		uintptr_t cpth = 0;
+		uint64_t mbuf;
+
+		mbuf = u64[1] - sizeof(struct rte_mbuf);
+		rte_prefetch0((void *)mbuf);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(((struct rte_mbuf *)mbuf)->pool, (void **)&mbuf, 1, 1);
+
+		u64[0] = CNXK_CLR_SUB_EVENT(u64[0]);
+		cn20k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xFFFFF, flags, ws->lookup_mem, cpth,
+				  sa_base);
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
+		u64[1] = mbuf;
+	}
 }
 
 static __rte_always_inline uint16_t
@@ -150,11 +198,112 @@ int __rte_hot cn20k_sso_hws_preschedule_modify(void *port,
 					       enum rte_event_dev_preschedule_type type);
 void __rte_hot cn20k_sso_hws_preschedule(void *port, enum rte_event_dev_preschedule_type type);
 
-uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-					   uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
-uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-					       uint16_t nb_events, uint64_t timeout_ticks);
+#define R(name, flags)                                                                             \
+	uint16_t __rte_hot cn20k_sso_hws_deq_burst_##name(                                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_burst_##name(                                      \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_seg_burst_##name(                                     \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_seg_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_ca_seg_burst_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_deq_tmo_ca_seg_burst_##name(                              \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_burst_##name(                                    \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_burst_##name(                                 \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_seg_burst_##name(                                \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_seg_burst_##name(                            \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_ca_seg_burst_##name(                             \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);    \
+	uint16_t __rte_hot cn20k_sso_hws_reas_deq_tmo_ca_seg_burst_##name(                         \
+		void *port, struct rte_event ev[], uint16_t nb_events, uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_DEQ(fn, flags)                                                                         \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(timeout_ticks);                                                       \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return 1;                                                                  \
+		}                                                                                  \
+		return cn20k_sso_hws_get_work(ws, ev, flags);                                      \
+	}
+
+#define SSO_DEQ_SEG(fn, flags) SSO_DEQ(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define SSO_DEQ_TMO(fn, flags)                                                                     \
+	static __rte_always_inline uint16_t fn(void *port, struct rte_event *ev,                   \
+					       uint64_t timeout_ticks)                             \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint16_t ret = 1;                                                                  \
+		uint64_t iter;                                                                     \
+		if (ws->swtag_req) {                                                               \
+			ws->swtag_req = 0;                                                         \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);       \
+			return ret;                                                                \
+		}                                                                                  \
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);                                       \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)                         \
+			ret = cn20k_sso_hws_get_work(ws, ev, flags);                               \
+		return ret;                                                                        \
+	}
+
+#define SSO_DEQ_TMO_SEG(fn, flags) SSO_DEQ_TMO(fn, flags | NIX_RX_MULTI_SEG_F)
+
+#define R(name, flags)                                                                             \
+	SSO_DEQ(cn20k_sso_hws_deq_##name, flags)                                                   \
+	SSO_DEQ(cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)                              \
+	SSO_DEQ_SEG(cn20k_sso_hws_deq_seg_##name, flags)                                           \
+	SSO_DEQ_SEG(cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO(cn20k_sso_hws_deq_tmo_##name, flags)                                           \
+	SSO_DEQ_TMO(cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)                      \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_deq_tmo_seg_##name, flags)                                   \
+	SSO_DEQ_TMO_SEG(cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define SSO_CMN_DEQ_BURST(fnb, fn, flags)                                                          \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+#define SSO_CMN_DEQ_SEG_BURST(fnb, fn, flags)                                                      \
+	uint16_t __rte_hot fnb(void *port, struct rte_event ev[], uint16_t nb_events,              \
+			       uint64_t timeout_ticks)                                             \
+	{                                                                                          \
+		RTE_SET_USED(nb_events);                                                           \
+		return fn(port, ev, timeout_ticks);                                                \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[],
+						       uint16_t nb_events, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[],
+							   uint16_t nb_events,
+							   uint64_t timeout_ticks);
 
 #endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
new file mode 100644
index 0000000000..f7e0e8fe71
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
new file mode 100644
index 0000000000..7d5d4823c3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		      cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
new file mode 100644
index 0000000000..1bdc4bc82d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
new file mode 100644
index 0000000000..d3ed5fcac0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_0_15_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                                             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,                                  \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)                                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,                             \
+			  cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_0_15
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
new file mode 100644
index 0000000000..29c21441cf
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
new file mode 100644
index 0000000000..004b5ecb95
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
new file mode 100644
index 0000000000..d544b39e9e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
new file mode 100644
index 0000000000..ba7a1207ad
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_112_127_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_112_127
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
new file mode 100644
index 0000000000..eb7382e9d9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
new file mode 100644
index 0000000000..770b7221e6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
new file mode 100644
index 0000000000..1e71d22fc3
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
new file mode 100644
index 0000000000..1a9e7efa0a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_16_31_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_16_31
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
new file mode 100644
index 0000000000..3d51bd6659
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F_)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
new file mode 100644
index 0000000000..851b5b7d31
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
new file mode 100644
index 0000000000..038ba726a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_burst.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name,                   \
+			  flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
new file mode 100644
index 0000000000..68fb3ff53d
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_32_47_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_32_47
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
new file mode 100644
index 0000000000..84f3ccd39c
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
new file mode 100644
index 0000000000..417f622412
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
new file mode 100644
index 0000000000..7fbea69134
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+		  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
new file mode 100644
index 0000000000..3bee216768
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_48_63_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_48_63
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
new file mode 100644
index 0000000000..9b341a0df5
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
new file mode 100644
index 0000000000..1f051f74a9
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			   cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
new file mode 100644
index 0000000000..c134e27f25
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
new file mode 100644
index 0000000000..849e8e12fc
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_64_79_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+		 cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_64_79
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
new file mode 100644
index 0000000000..9724caf5d6
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
new file mode 100644
index 0000000000..997c208511
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
new file mode 100644
index 0000000000..bcf32e646b
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
new file mode 100644
index 0000000000..b24e73439a
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_80_95_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_80_95
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
new file mode 100644
index 0000000000..c03d034b66
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_burst_##name,                      \
+			  cn20k_sso_hws_deq_##name, flags)                     \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_burst_##name,                 \
+			  cn20k_sso_hws_reas_deq_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
new file mode 100644
index 0000000000..b37ef7a998
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_deq_seg_burst_##name,              \
+			      cn20k_sso_hws_deq_seg_##name, flags)             \
+	SSO_CMN_DEQ_SEG_BURST(cn20k_sso_hws_reas_deq_seg_burst_##name,         \
+			    cn20k_sso_hws_reas_deq_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
new file mode 100644
index 0000000000..da76b589a0
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_burst_##name,                  \
+			  cn20k_sso_hws_deq_tmo_##name, flags)                 \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_burst_##name,             \
+			  cn20k_sso_hws_reas_deq_tmo_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
new file mode 100644
index 0000000000..3a8c02e4d2
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_96_111_tmo_seg_burst.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define R(name, flags)                                                         \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_deq_tmo_seg_burst_##name,              \
+			  cn20k_sso_hws_deq_tmo_seg_##name, flags)             \
+	SSO_CMN_DEQ_BURST(cn20k_sso_hws_reas_deq_tmo_seg_burst_##name,         \
+			cn20k_sso_hws_reas_deq_tmo_seg_##name, flags | NIX_RX_REAS_F)
+
+NIX_RX_FASTPATH_MODES_96_111
+#undef R
+
+#endif
diff --git a/drivers/event/cnxk/deq/cn20k/deq_all_offload.c b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
new file mode 100644
index 0000000000..3983736b7e
--- /dev/null
+++ b/drivers/event/cnxk/deq/cn20k/deq_all_offload.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload(void *port, struct rte_event ev[], uint16_t nb_events,
+				    uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst_all_offload_tst(void *port, struct rte_event ev[], uint16_t nb_events,
+					uint64_t timeout_ticks)
+{
+	const uint32_t flags = (NIX_RX_OFFLOAD_RSS_F | NIX_RX_OFFLOAD_PTYPE_F |
+				NIX_RX_OFFLOAD_CHECKSUM_F | NIX_RX_OFFLOAD_MARK_UPDATE_F |
+				NIX_RX_OFFLOAD_TSTAMP_F | NIX_RX_OFFLOAD_VLAN_STRIP_F |
+				NIX_RX_OFFLOAD_SECURITY_F | NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F);
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	RTE_SET_USED(nb_events);
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		ws->gw_rdata = cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, flags);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, flags);
+
+	return ret;
+}
+
+#endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index d0dc2320e1..a2bafab268 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -234,6 +234,49 @@ sources += files(
         'cn20k_eventdev.c',
         'cn20k_worker.c',
 )
+
+if host_machine.cpu_family().startswith('aarch') and not disable_template
+sources += files(
+        'deq/cn20k/deq_0_15_burst.c',
+        'deq/cn20k/deq_16_31_burst.c',
+        'deq/cn20k/deq_32_47_burst.c',
+        'deq/cn20k/deq_48_63_burst.c',
+        'deq/cn20k/deq_64_79_burst.c',
+        'deq/cn20k/deq_80_95_burst.c',
+        'deq/cn20k/deq_96_111_burst.c',
+        'deq/cn20k/deq_112_127_burst.c',
+        'deq/cn20k/deq_0_15_seg_burst.c',
+        'deq/cn20k/deq_16_31_seg_burst.c',
+        'deq/cn20k/deq_32_47_seg_burst.c',
+        'deq/cn20k/deq_48_63_seg_burst.c',
+        'deq/cn20k/deq_64_79_seg_burst.c',
+        'deq/cn20k/deq_80_95_seg_burst.c',
+        'deq/cn20k/deq_96_111_seg_burst.c',
+        'deq/cn20k/deq_112_127_seg_burst.c',
+        'deq/cn20k/deq_0_15_tmo_burst.c',
+        'deq/cn20k/deq_16_31_tmo_burst.c',
+        'deq/cn20k/deq_32_47_tmo_burst.c',
+        'deq/cn20k/deq_48_63_tmo_burst.c',
+        'deq/cn20k/deq_64_79_tmo_burst.c',
+        'deq/cn20k/deq_80_95_tmo_burst.c',
+        'deq/cn20k/deq_96_111_tmo_burst.c',
+        'deq/cn20k/deq_112_127_tmo_burst.c',
+        'deq/cn20k/deq_0_15_tmo_seg_burst.c',
+        'deq/cn20k/deq_16_31_tmo_seg_burst.c',
+        'deq/cn20k/deq_32_47_tmo_seg_burst.c',
+        'deq/cn20k/deq_48_63_tmo_seg_burst.c',
+        'deq/cn20k/deq_64_79_tmo_seg_burst.c',
+        'deq/cn20k/deq_80_95_tmo_seg_burst.c',
+        'deq/cn20k/deq_96_111_tmo_seg_burst.c',
+        'deq/cn20k/deq_112_127_tmo_seg_burst.c',
+        'deq/cn20k/deq_all_offload.c',
+)
+
+else
+sources += files(
+        'deq/cn20k/deq_all_offload.c',
+)
+endif
 endif
 
 extra_flags = ['-flax-vector-conversions', '-Wno-strict-aliasing']
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 17/22] event/cnxk: support CN20K Tx adapter
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (14 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
                                 ` (4 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c  | 126 +++++++++++++++++++++++++++
 drivers/event/cnxk/cn20k_eventdev.h  |   4 +
 drivers/event/cnxk/cn20k_tx_worker.h |  16 ++++
 3 files changed, 146 insertions(+)
 create mode 100644 drivers/event/cnxk/cn20k_tx_worker.h

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 408014036a..509c6ea630 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -6,6 +6,7 @@
 
 #include "cn20k_ethdev.h"
 #include "cn20k_eventdev.h"
+#include "cn20k_tx_worker.h"
 #include "cn20k_worker.h"
 #include "cnxk_common.h"
 #include "cnxk_eventdev.h"
@@ -168,6 +169,35 @@ cn20k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
 	return roc_sso_rsrc_init(&dev->sso, hws, hwgrp, nb_tim_lfs);
 }
 
+static int
+cn20k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	int i;
+
+	if (dev->tx_adptr_data == NULL)
+		return 0;
+
+	for (i = 0; i < dev->nb_event_ports; i++) {
+		struct cn20k_sso_hws *ws = event_dev->data->ports[i];
+		void *ws_cookie;
+
+		ws_cookie = cnxk_sso_hws_get_cookie(ws);
+		ws_cookie = rte_realloc_socket(ws_cookie,
+					       sizeof(struct cnxk_sso_hws_cookie) +
+						       sizeof(struct cn20k_sso_hws) +
+						       dev->tx_adptr_data_sz,
+					       RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+		if (ws_cookie == NULL)
+			return -ENOMEM;
+		ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+		memcpy(&ws->tx_adptr_data, dev->tx_adptr_data, dev->tx_adptr_data_sz);
+		event_dev->data->ports[i] = ws;
+	}
+
+	return 0;
+}
+
 #if defined(RTE_ARCH_ARM64)
 static inline void
 cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
@@ -634,6 +664,95 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+			      uint32_t *caps)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		*caps = 0;
+	else
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+	return 0;
+}
+
+static void
+cn20k_sso_txq_fc_update(const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cn20k_eth_txq *txq;
+	struct roc_nix_sq *sq;
+	int i;
+
+	if (tx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+			cn20k_sso_txq_fc_update(eth_dev, i);
+	} else {
+		uint16_t sqes_per_sqb;
+
+		sq = &cnxk_eth_dev->sqs[tx_queue_id];
+		txq = eth_dev->data->tx_queues[tx_queue_id];
+		sqes_per_sqb = 1U << txq->sqes_per_sqb_log2;
+		if (cnxk_eth_dev->tx_offloads & RTE_ETH_TX_OFFLOAD_SECURITY)
+			sq->nb_sqb_bufs_adj -= (cnxk_eth_dev->outb.nb_desc / sqes_per_sqb);
+		txq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;
+	}
+}
+
+static int
+cn20k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint64_t tx_offloads;
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+
+	/* Can't enable tstamp if all the ports don't have it enabled. */
+	tx_offloads = cnxk_eth_dev->tx_offload_flags;
+	if (dev->tx_adptr_configured) {
+		uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+		uint8_t tstmp_ena = !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+		if (tstmp_ena && !tstmp_req)
+			dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+		else if (!tstmp_ena && tstmp_req)
+			tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+	}
+
+	dev->tx_offloads |= tx_offloads;
+	cn20k_sso_txq_fc_update(eth_dev, tx_queue_id);
+	rc = cn20k_sso_updt_tx_adptr_data(event_dev);
+	if (rc < 0)
+		return rc;
+	cn20k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+	dev->tx_adptr_configured = 1;
+
+	return 0;
+}
+
+static int
+cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
+			       const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+	int rc;
+
+	RTE_SET_USED(id);
+	rc = cnxk_sso_tx_adapter_queue_del(event_dev, eth_dev, tx_queue_id);
+	if (rc < 0)
+		return rc;
+	return cn20k_sso_updt_tx_adptr_data(event_dev);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -659,6 +778,13 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
+	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
+	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
+	.eth_tx_adapter_start = cnxk_sso_tx_adapter_start,
+	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
+	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
diff --git a/drivers/event/cnxk/cn20k_eventdev.h b/drivers/event/cnxk/cn20k_eventdev.h
index 7a6363a89e..8ea2878fa5 100644
--- a/drivers/event/cnxk/cn20k_eventdev.h
+++ b/drivers/event/cnxk/cn20k_eventdev.h
@@ -25,6 +25,10 @@ struct __rte_cache_aligned cn20k_sso_hws {
 	uintptr_t grp_base;
 	uint16_t xae_waes;
 	int32_t xaq_lmt;
+	/* Tx Fastpath data */
+	alignas(RTE_CACHE_LINE_SIZE) uintptr_t lmt_base;
+	uint64_t lso_tun_fmt;
+	uint8_t tx_adptr_data[];
 };
 
 #endif /* __CN20K_EVENTDEV_H__ */
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
new file mode 100644
index 0000000000..63fbdf5328
--- /dev/null
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2024 Marvell.
+ */
+
+#ifndef __CN20K_TX_WORKER_H__
+#define __CN20K_TX_WORKER_H__
+
+#include <rte_eventdev.h>
+#include <rte_vect.h>
+
+#include "cn20k_eventdev.h"
+#include "cn20k_tx.h"
+#include "cnxk_eventdev_dp.h"
+#include <rte_event_eth_tx_adapter.h>
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 18/22] event/cnxk: support CN20K Tx adapter fast path
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (15 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 19/22] common/cnxk: add SSO event aggregator pbhagavatula
                                 ` (3 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c          |  29 +++
 drivers/event/cnxk/cn20k_tx_worker.h         | 176 +++++++++++++++++++
 drivers/event/cnxk/meson.build               |  20 +++
 drivers/event/cnxk/tx/cn20k/tx_0_15.c        |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c    |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127.c     |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95.c       |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c   |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111.c      |  18 ++
 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c  |  19 ++
 drivers/event/cnxk/tx/cn20k/tx_all_offload.c |  40 +++++
 20 files changed, 561 insertions(+)
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
 create mode 100644 drivers/event/cnxk/tx/cn20k/tx_all_offload.c

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 509c6ea630..5d49a5e5c6 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -15,6 +15,9 @@
 #define CN20K_SET_EVDEV_DEQ_OP(dev, deq_op, deq_ops)                                               \
 	deq_op = deq_ops[dev->rx_offloads & (NIX_RX_OFFLOAD_MAX - 1)]
 
+#define CN20K_SET_EVDEV_ENQ_OP(dev, enq_op, enq_ops)                                               \
+	enq_op = enq_ops[dev->tx_offloads & (NIX_TX_OFFLOAD_MAX - 1)]
+
 static void *
 cn20k_sso_init_hws_mem(void *arg, uint8_t port_id)
 {
@@ -253,6 +256,19 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 #undef R
 	};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
+	const event_tx_adapter_enqueue_t sso_hws_tx_adptr_enq_seg[NIX_TX_OFFLOAD_MAX] = {
+#define T(name, sz, flags) [flags] = cn20k_sso_hws_tx_adptr_enq_seg_##name,
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
 	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
 		if (dev->rx_offloads & NIX_RX_REAS_F) {
 			CN20K_SET_EVDEV_DEQ_OP(dev, event_dev->dequeue_burst,
@@ -285,6 +301,12 @@ cn20k_sso_fp_tmplt_fns_set(struct rte_eventdev *event_dev)
 		}
 	}
 
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq_seg);
+	else
+		CN20K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue, sso_hws_tx_adptr_enq);
+
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 #else
 	RTE_SET_USED(event_dev);
 #endif
@@ -299,6 +321,13 @@ cn20k_sso_fp_blk_fns_set(struct rte_eventdev *event_dev)
 	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload;
 	if (dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)
 		event_dev->dequeue_burst = cn20k_sso_hws_deq_burst_all_offload_tst;
+	event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload;
+	if (dev->tx_offloads & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | NIX_TX_OFFLOAD_VLAN_QINQ_F |
+				NIX_TX_OFFLOAD_TSO_F | NIX_TX_OFFLOAD_TSTAMP_F)) {
+		event_dev->txa_enqueue = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+		event_dev->txa_enqueue_same_dest = cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst;
+	}
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index 63fbdf5328..c8ab560b0e 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -13,4 +13,180 @@
 #include "cnxk_eventdev_dp.h"
 #include <rte_event_eth_tx_adapter.h>
 
+/* CN20K Tx event fastpath */
+
+static __rte_always_inline struct cn20k_eth_txq *
+cn20k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
+{
+	return (struct cn20k_eth_txq *)(txq_data[(txq_data[m->port] >> 48) +
+						 rte_event_eth_tx_adapter_txq_get(m)] &
+					(BIT_ULL(48) - 1));
+}
+
+static __rte_always_inline void
+cn20k_sso_txq_fc_wait(const struct cn20k_eth_txq *txq)
+{
+	int64_t avail;
+
+#ifdef RTE_ARCH_ARM64
+	int64_t val;
+
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.gt .Ldne%=				\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldxr %[val], [%[addr]]			\n"
+		     "		sub %[val], %[adj], %[val]		\n"
+		     "		lsl %[refill], %[val], %[shft]		\n"
+		     "		sub %[refill], %[refill], %[val]	\n"
+		     "		cmp %[refill], #0x0			\n"
+		     "		b.le .Lrty%=				\n"
+		     ".Ldne%=:						\n"
+		     : [refill] "=&r"(avail), [val] "=&r" (val)
+		     : [addr] "r" (txq->fc_mem), [adj] "r" (txq->nb_sqb_bufs_adj),
+		       [shft] "r" (txq->sqes_per_sqb_log2)
+		     : "memory");
+#else
+	do {
+		avail = txq->nb_sqb_bufs_adj -
+			rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+						 rte_memory_order_relaxed);
+	} while (((avail << txq->sqes_per_sqb_log2) - avail) <= 0);
+#endif
+}
+
+static __rte_always_inline int32_t
+cn20k_sso_sq_depth(const struct cn20k_eth_txq *txq)
+{
+	int32_t avail = (int32_t)txq->nb_sqb_bufs_adj -
+			(int32_t)rte_atomic_load_explicit((uint64_t __rte_atomic *)txq->fc_mem,
+							  rte_memory_order_relaxed);
+	return (avail << txq->sqes_per_sqb_log2) - avail;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
+		 uintptr_t lmt_addr, uint8_t sched_type, const uint64_t *txq_data,
+		 const uint32_t flags)
+{
+	uint8_t lnum = 0, loff = 0, shft = 0;
+	struct rte_mbuf *extm = NULL;
+	struct cn20k_eth_txq *txq;
+	uintptr_t laddr;
+	uint16_t segdw;
+	uintptr_t pa;
+	bool sec;
+
+	txq = cn20k_sso_hws_xtract_meta(m, txq_data);
+	if (cn20k_sso_sq_depth(txq) <= 0)
+		return 0;
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
+		handle_tx_completion_pkts(txq, 1);
+
+	cn20k_nix_tx_skeleton(txq, cmd, flags, 0);
+	/* Perform header writes before barrier
+	 * for TSO
+	 */
+	if (flags & NIX_TX_OFFLOAD_TSO_F)
+		cn20k_nix_xmit_prepare_tso(m, flags);
+
+	cn20k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, &sec, txq->mark_flag,
+			       txq->mark_fmt);
+
+	laddr = lmt_addr;
+	/* Prepare CPT instruction and get nixtx addr if
+	 * it is for CPT on same lmtline.
+	 */
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff, &shft, txq->sa_base,
+				   flags);
+
+	/* Move NIX desc to LMT/NIXTX area */
+	cn20k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
+
+	if (flags & NIX_TX_MULTI_SEG_F)
+		segdw = cn20k_nix_prepare_mseg(txq, m, &extm, (uint64_t *)laddr, flags);
+	else
+		segdw = cn20k_nix_tx_ext_subs(flags) + 2;
+
+	cn20k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		pa = txq->cpt_io_addr | 3 << 4;
+	else
+		pa = txq->io_addr | ((segdw - 1) << 4);
+
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
+
+	cn20k_sso_txq_fc_wait(txq);
+	if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+		cn20k_nix_sec_fc_wait_one(txq);
+
+	roc_lmt_submit_steorl(lmt_id, pa);
+
+	/* Memory barrier to make sure lmtst store completes */
+	rte_io_wmb();
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+		cn20k_nix_free_extmbuf(extm);
+
+	return 1;
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
+		       const uint64_t *txq_data, const uint32_t flags)
+{
+	struct rte_mbuf *m;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+
+	lmt_addr = ws->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	m = ev->mbuf;
+	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
+}
+
+#define T(name, sz, flags)                                                                         \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_##name(void *port, struct rte_event ev[],    \
+							     uint16_t nb_events);                  \
+	uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_##name(                                  \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
+#define SSO_TX(fn, sz, flags)                                                                      \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		uint64_t cmd[sz];                                                                  \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data, flags);         \
+	}
+
+#define SSO_TX_SEG(fn, sz, flags)                                                                  \
+	uint16_t __rte_hot fn(void *port, struct rte_event ev[], uint16_t nb_events)               \
+	{                                                                                          \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];                               \
+		struct cn20k_sso_hws *ws = port;                                                   \
+		RTE_SET_USED(nb_events);                                                           \
+		return cn20k_sso_hws_event_tx(ws, &ev[0], cmd,                                     \
+					      (const uint64_t *)ws->tx_adptr_data,                 \
+					      (flags) | NIX_TX_MULTI_SEG_F);                       \
+	}
+
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[],
+							      uint16_t nb_events);
+uint16_t __rte_hot cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+								  uint16_t nb_events);
+
 #endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index a2bafab268..8aaf8116f7 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -272,9 +272,29 @@ sources += files(
         'deq/cn20k/deq_all_offload.c',
 )
 
+sources += files(
+        'tx/cn20k/tx_0_15.c',
+        'tx/cn20k/tx_16_31.c',
+        'tx/cn20k/tx_32_47.c',
+        'tx/cn20k/tx_48_63.c',
+        'tx/cn20k/tx_64_79.c',
+        'tx/cn20k/tx_80_95.c',
+        'tx/cn20k/tx_96_111.c',
+        'tx/cn20k/tx_112_127.c',
+        'tx/cn20k/tx_0_15_seg.c',
+        'tx/cn20k/tx_16_31_seg.c',
+        'tx/cn20k/tx_32_47_seg.c',
+        'tx/cn20k/tx_48_63_seg.c',
+        'tx/cn20k/tx_64_79_seg.c',
+        'tx/cn20k/tx_80_95_seg.c',
+        'tx/cn20k/tx_96_111_seg.c',
+        'tx/cn20k/tx_112_127_seg.c',
+        'tx/cn20k/tx_all_offload.c',
+)
 else
 sources += files(
         'deq/cn20k/deq_all_offload.c',
+        'tx/cn20k/tx_all_offload.c',
 )
 endif
 endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15.c b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
new file mode 100644
index 0000000000..b681bc8ab0
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
new file mode 100644
index 0000000000..1dacb63d4b
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_0_15_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_0_15
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127.c b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
new file mode 100644
index 0000000000..abdb8b76a1
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
new file mode 100644
index 0000000000..c39d331b25
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_112_127_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_112_127
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31.c b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
new file mode 100644
index 0000000000..5b88c47914
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
new file mode 100644
index 0000000000..13f00ac478
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_16_31_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_16_31
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47.c b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
new file mode 100644
index 0000000000..1f6008c425
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
new file mode 100644
index 0000000000..587f22df3a
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_32_47_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_32_47
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63.c b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
new file mode 100644
index 0000000000..c712825417
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
new file mode 100644
index 0000000000..1fc11ec904
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_48_63_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_48_63
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79.c b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
new file mode 100644
index 0000000000..0e427f79d8
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
new file mode 100644
index 0000000000..6e1ae41b26
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_64_79_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_64_79
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95.c b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
new file mode 100644
index 0000000000..8c87d2341d
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
new file mode 100644
index 0000000000..43a143f4bd
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_80_95_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_80_95
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111.c b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
new file mode 100644
index 0000000000..1a43af8b02
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags) SSO_TX(cn20k_sso_hws_tx_adptr_enq_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
new file mode 100644
index 0000000000..e0e1d8a4ef
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_96_111_seg.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if !defined(CNXK_DIS_TMPLT_FUNC)
+
+#define T(name, sz, flags)                                                     \
+	SSO_TX_SEG(cn20k_sso_hws_tx_adptr_enq_seg_##name, sz, flags)
+
+NIX_TX_FASTPATH_MODES_96_111
+#undef T
+
+#endif
diff --git a/drivers/event/cnxk/tx/cn20k/tx_all_offload.c b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
new file mode 100644
index 0000000000..d2158a4256
--- /dev/null
+++ b/drivers/event/cnxk/tx/cn20k/tx_all_offload.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+#include "cn20k_tx_worker.h"
+
+#ifdef _ROC_API_H_
+#error "roc_api.h is included"
+#endif
+
+#if defined(CNXK_DIS_TMPLT_FUNC)
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload(void *port, struct rte_event ev[], uint16_t nb_events)
+{
+	const uint32_t flags = (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_MBUF_NOFF_F |
+				NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tx_adptr_enq_seg_all_offload_tst(void *port, struct rte_event ev[],
+					       uint16_t nb_events)
+{
+	const uint32_t flags =
+		(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
+		 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_MBUF_NOFF_F | NIX_TX_OFFLOAD_TSO_F |
+		 NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_SECURITY_F | NIX_TX_MULTI_SEG_F);
+	uint64_t cmd[8 + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
+
+	struct cn20k_sso_hws *ws = port;
+	RTE_SET_USED(nb_events);
+	return cn20k_sso_hws_event_tx(ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, flags);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 19/22] common/cnxk: add SSO event aggregator
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (16 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
                                 ` (2 subsequent siblings)
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add configuration APIs for CN20K SSO event
aggregator which allows SSO to generate event
vectors.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/sso.h        |  33 ++++
 drivers/common/cnxk/roc_mbox.h      |  21 +++
 drivers/common/cnxk/roc_model.h     |  13 ++
 drivers/common/cnxk/roc_nix_queue.c |   5 -
 drivers/common/cnxk/roc_sso.c       | 230 +++++++++++++++++++++++++++-
 drivers/common/cnxk/roc_sso.h       |  19 ++-
 drivers/common/cnxk/roc_sso_priv.h  |   4 +
 drivers/common/cnxk/version.map     |   4 +
 8 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/drivers/common/cnxk/hw/sso.h b/drivers/common/cnxk/hw/sso.h
index 09b8d4955f..79337a8a3b 100644
--- a/drivers/common/cnxk/hw/sso.h
+++ b/drivers/common/cnxk/hw/sso.h
@@ -146,6 +146,7 @@
 #define SSO_LF_GGRP_OP_ADD_WORK0 (0x0ull)
 #define SSO_LF_GGRP_OP_ADD_WORK1 (0x8ull)
 #define SSO_LF_GGRP_QCTL	 (0x20ull)
+#define SSO_LF_GGRP_TAG_CFG	 (0x40ull)
 #define SSO_LF_GGRP_EXE_DIS	 (0x80ull)
 #define SSO_LF_GGRP_INT		 (0x100ull)
 #define SSO_LF_GGRP_INT_W1S	 (0x108ull)
@@ -159,6 +160,10 @@
 #define SSO_LF_GGRP_MISC_CNT	 (0x200ull)
 #define SSO_LF_GGRP_OP_AW_LMTST	 (0x400ull)
 
+#define SSO_LF_GGRP_AGGR_CFG	    (0x300ull)
+#define SSO_LF_GGRP_AGGR_CTX_BASE   (0x308ull)
+#define SSO_LF_GGRP_AGGR_CTX_INSTOP (0x310ull)
+
 #define SSO_AF_IAQ_FREE_CNT_MASK      0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_MASK     0x3FFFull
 #define SSO_AF_IAQ_RSVD_FREE_SHIFT    16
@@ -230,5 +235,33 @@
 #define SSO_TT_ATOMIC	(0x1ull)
 #define SSO_TT_UNTAGGED (0x2ull)
 #define SSO_TT_EMPTY	(0x3ull)
+#define SSO_TT_AGG	(0x3ull)
+
+#define SSO_LF_AGGR_INSTOP_FLUSH	(0x0ull)
+#define SSO_LF_AGGR_INSTOP_EVICT	(0x1ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_FLUSH (0x2ull)
+#define SSO_LF_AGGR_INSTOP_GLOBAL_EVICT (0x3ull)
+
+#define SSO_AGGR_CTX_SZ	    16
+#define SSO_AGGR_NUM_CTX(a) (1 << (a + 6))
+#define SSO_AGGR_MIN_CTX    SSO_AGGR_NUM_CTX(0)
+#define SSO_AGGR_MAX_CTX    SSO_AGGR_NUM_CTX(10)
+#define SSO_AGGR_DEF_TMO    0x3Full
+
+struct sso_agq_ctx {
+	uint64_t ena : 1;
+	uint64_t rsvd_1_3 : 3;
+	uint64_t vwqe_aura : 17;
+	uint64_t rsvd_21_31 : 11;
+	uint64_t tag : 32;
+	uint64_t tt : 2;
+	uint64_t rsvd_66_67 : 2;
+	uint64_t swqe_tag : 12;
+	uint64_t max_vsize_exp : 4;
+	uint64_t vtimewait : 12;
+	uint64_t xqe_type : 4;
+	uint64_t cnt_ena : 1;
+	uint64_t rsvd_101_127 : 27;
+};
 
 #endif /* __SSO_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 645da563c2..960535eca0 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -147,6 +147,10 @@ struct mbox_msghdr {
 	  msg_rsp)                                                             \
 	M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config,                   \
 	  sso_grp_stash_cfg, msg_rsp)                                          \
+	M(SSO_AGGR_SET_CONFIG, 0x615, sso_aggr_setconfig, sso_aggr_setconfig,  \
+	  msg_rsp)                                                             \
+	M(SSO_AGGR_GET_STATS, 0x616, sso_aggr_get_stats, sso_info_req,         \
+	  sso_aggr_stats)                                                      \
 	M(SSO_GET_HW_INFO, 0x617, sso_get_hw_info, msg_req, sso_hw_info)       \
 	/* TIM mbox IDs (range 0x800 - 0x9FF) */                               \
 	M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req,                 \
@@ -2189,6 +2193,13 @@ struct sso_grp_stash_cfg {
 	uint8_t __io num_linesm1 : 4;
 };
 
+struct sso_aggr_setconfig {
+	struct mbox_msghdr hdr;
+	uint16_t __io npa_pf_func;
+	uint16_t __io hwgrp;
+	uint64_t __io rsvd[2];
+};
+
 struct sso_grp_stats {
 	struct mbox_msghdr hdr;
 	uint16_t __io grp;
@@ -2208,6 +2219,16 @@ struct sso_hws_stats {
 	uint64_t __io arbitration;
 };
 
+struct sso_aggr_stats {
+	struct mbox_msghdr hdr;
+	uint16_t __io grp;
+	uint64_t __io flushed;
+	uint64_t __io completed;
+	uint64_t __io npa_fail;
+	uint64_t __io timeout;
+	uint64_t __io rsvd[4];
+};
+
 /* CPT mailbox error codes
  * Range 901 - 1000.
  */
diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h
index 4e686bea2c..0de141b0cc 100644
--- a/drivers/common/cnxk/roc_model.h
+++ b/drivers/common/cnxk/roc_model.h
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include "roc_bits.h"
+#include "roc_constants.h"
 
 extern struct roc_model *roc_model;
 
@@ -157,6 +158,18 @@ roc_model_is_cn20k(void)
 	return roc_model_runtime_is_cn20k();
 }
 
+static inline uint16_t
+roc_model_optimal_align_sz(void)
+{
+	if (roc_model_is_cn9k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn10k())
+		return ROC_ALIGN;
+	if (roc_model_is_cn20k())
+		return ROC_ALIGN << 1;
+	return 128;
+}
+
 static inline uint64_t
 roc_model_is_cn98xx(void)
 {
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index 06029275af..e852211ba4 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -794,9 +794,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 		aq->rq.good_utag = rq->tag_mask >> 24;
 		aq->rq.bad_utag = rq->tag_mask >> 24;
 		aq->rq.ltag = rq->tag_mask & BITMASK_ULL(24, 0);
-
-		if (rq->vwqe_ena)
-			aq->rq.wqe_aura = roc_npa_aura_handle_to_aura(rq->vwqe_aura_handle);
 	} else {
 		/* CQ mode */
 		aq->rq.sso_ena = 0;
@@ -881,8 +878,6 @@ nix_rq_cfg(struct dev *dev, struct roc_nix_rq *rq, uint16_t qints, bool cfg, boo
 			aq->rq_mask.good_utag = ~aq->rq_mask.good_utag;
 			aq->rq_mask.bad_utag = ~aq->rq_mask.bad_utag;
 			aq->rq_mask.ltag = ~aq->rq_mask.ltag;
-			if (rq->vwqe_ena)
-				aq->rq_mask.wqe_aura = ~aq->rq_mask.wqe_aura;
 		} else {
 			/* CQ mode */
 			aq->rq_mask.sso_ena = ~aq->rq_mask.sso_ena;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 45cf6fc39e..4996329018 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -500,9 +500,231 @@ roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 	mbox_put(mbox);
 }
 
+static void
+sso_agq_op_wait(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	uint64_t reg;
+
+	reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	while (reg & BIT_ULL(2)) {
+		plt_delay_us(100);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) +
+				 SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	}
+}
+
+int
+roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_agq_data *data)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	uint32_t cnt, off;
+	struct mbox *mbox;
+	uintptr_t ptr;
+	uint64_t reg;
+	int rc;
+
+	if (sso->agg_mem[hwgrp] == 0) {
+		mbox = mbox_get(sso->dev.mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			mbox_process(mbox);
+			req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+			if (req == NULL) {
+				plt_err("Failed to allocate AGQ config mbox.");
+				mbox_put(mbox);
+				return -EIO;
+			}
+		}
+
+		req->hwgrp = hwgrp;
+		req->npa_pf_func = idev_npa_pffunc_get();
+		rc = mbox_process(mbox);
+		if (rc < 0) {
+			plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+			mbox_put(mbox);
+			return rc;
+		}
+
+		mbox_put(mbox);
+
+		sso->agg_mem[hwgrp] =
+			(uintptr_t)plt_zmalloc(SSO_AGGR_MIN_CTX * sizeof(struct sso_agq_ctx),
+					       roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0)
+			return -ENOMEM;
+		sso->agg_cnt[hwgrp] = SSO_AGGR_MIN_CTX;
+		sso->agg_used[hwgrp] = 0;
+		plt_wmb();
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = (plt_log2_u32(SSO_AGGR_MIN_CTX) - 6) << 16;
+		reg |= (SSO_AGGR_DEF_TMO << 4) | 1;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	}
+
+	if (sso->agg_cnt[hwgrp] >= SSO_AGGR_MAX_CTX)
+		return -ENOSPC;
+
+	if (sso->agg_cnt[hwgrp] == sso->agg_used[hwgrp]) {
+		ptr = sso->agg_mem[hwgrp];
+		cnt = sso->agg_cnt[hwgrp] << 1;
+		sso->agg_mem[hwgrp] = (uintptr_t)plt_zmalloc(cnt * sizeof(struct sso_agq_ctx),
+							     roc_model_optimal_align_sz());
+		if (sso->agg_mem[hwgrp] == 0) {
+			sso->agg_mem[hwgrp] = ptr;
+			return -ENOMEM;
+		}
+
+		memcpy((void *)sso->agg_mem[hwgrp], (void *)ptr,
+		       sso->agg_cnt[hwgrp] * sizeof(struct sso_agq_ctx));
+		plt_wmb();
+		sso_agq_op_wait(roc_sso, hwgrp);
+		/* Base address has changed, evict old entries. */
+		plt_write64(sso->agg_mem[hwgrp],
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+		reg = plt_read64(roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg &= ~GENMASK_ULL(19, 16);
+		reg |= (uint64_t)(plt_log2_u32(cnt) - 6) << 16;
+		plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+		reg = SSO_LF_AGGR_INSTOP_GLOBAL_EVICT << 4;
+		plt_write64(reg,
+			    roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+		sso_agq_op_wait(roc_sso, hwgrp);
+		plt_free((void *)ptr);
+
+		sso->agg_cnt[hwgrp] = cnt;
+		off = sso->agg_used[hwgrp];
+	} else {
+		ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+		for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+			if (!ctx[cnt].ena)
+				break;
+		}
+		if (cnt == sso->agg_cnt[hwgrp])
+			return -EINVAL;
+		off = cnt;
+	}
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += off;
+	ctx->ena = 1;
+	ctx->tt = data->tt;
+	ctx->tag = data->tag;
+	ctx->swqe_tag = data->stag;
+	ctx->cnt_ena = data->cnt_ena;
+	ctx->xqe_type = data->xqe_type;
+	ctx->vtimewait = data->vwqe_wait_tmo;
+	ctx->vwqe_aura = data->vwqe_aura;
+	ctx->max_vsize_exp = data->vwqe_max_sz_exp - 2;
+
+	plt_wmb();
+	sso->agg_used[hwgrp]++;
+
+	return 0;
+}
+
+void
+roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint64_t reg;
+
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	ctx += agq_id;
+
+	if (!ctx->ena)
+		return;
+
+	reg = SSO_LF_AGGR_INSTOP_FLUSH << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+
+	memset(ctx, 0, sizeof(struct sso_agq_ctx));
+	plt_wmb();
+	sso->agg_used[hwgrp]--;
+
+	/* Flush the context from CTX Cache */
+	reg = SSO_LF_AGGR_INSTOP_EVICT << 4;
+	reg |= (uint64_t)(agq_id << 8);
+
+	plt_write64(reg, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_INSTOP);
+	sso_agq_op_wait(roc_sso, hwgrp);
+}
+
+void
+roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_aggr_setconfig *req;
+	struct sso_agq_ctx *ctx;
+	struct mbox *mbox;
+	uint32_t cnt;
+	int rc;
+
+	if (!roc_sso->feat.eva_present)
+		return;
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CFG);
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (cnt = 0; cnt < sso->agg_cnt[hwgrp]; cnt++) {
+		if (!ctx[cnt].ena)
+			continue;
+		roc_sso_hwgrp_agq_free(roc_sso, hwgrp, cnt);
+	}
+
+	plt_write64(0, roc_sso_hwgrp_base_get(roc_sso, hwgrp) + SSO_LF_GGRP_AGGR_CTX_BASE);
+	plt_free((void *)sso->agg_mem[hwgrp]);
+	sso->agg_mem[hwgrp] = 0;
+	sso->agg_cnt[hwgrp] = 0;
+	sso->agg_used[hwgrp] = 0;
+
+	mbox = mbox_get(sso->dev.mbox);
+	req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+	if (req == NULL) {
+		mbox_process(mbox);
+		req = mbox_alloc_msg_sso_aggr_setconfig(mbox);
+		if (req == NULL) {
+			plt_err("Failed to allocate AGQ config mbox.");
+			mbox_put(mbox);
+			return;
+		}
+	}
+
+	req->hwgrp = hwgrp;
+	req->npa_pf_func = 0;
+	rc = mbox_process(mbox);
+	if (rc < 0)
+		plt_err("Failed to set HWGRP AGQ config rc=%d", rc);
+	mbox_put(mbox);
+}
+
+uint32_t
+roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag_mask,
+			   uint8_t xqe_type)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	struct sso_agq_ctx *ctx;
+	uint32_t i;
+
+	plt_rmb();
+	ctx = (struct sso_agq_ctx *)sso->agg_mem[hwgrp];
+	for (i = 0; i < sso->agg_used[hwgrp]; i++) {
+		if (!ctx[i].ena)
+			continue;
+		if (ctx[i].tag == tag_mask && ctx[i].xqe_type == xqe_type)
+			return i;
+	}
+
+	return UINT32_MAX;
+}
+
 int
-roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
-			struct roc_sso_hwgrp_stats *stats)
+roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp, struct roc_sso_hwgrp_stats *stats)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
 	struct sso_grp_stats *req_rsp;
@@ -1058,10 +1280,14 @@ void
 roc_sso_rsrc_fini(struct roc_sso *roc_sso)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+	uint32_t cnt;
 
 	if (!roc_sso->nb_hws && !roc_sso->nb_hwgrp)
 		return;
 
+	for (cnt = 0; cnt < roc_sso->nb_hwgrp; cnt++)
+		roc_sso_hwgrp_agq_release(roc_sso, cnt);
+
 	sso_unregister_irqs_priv(roc_sso, sso->pci_dev->intr_handle,
 				 roc_sso->nb_hws, roc_sso->nb_hwgrp);
 	sso_lf_free(&sso->dev, SSO_LF_TYPE_HWS, roc_sso->nb_hws);
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 021db22c86..f73128087a 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -47,6 +47,17 @@ struct roc_sso_xaq_data {
 	void *mem;
 };
 
+struct roc_sso_agq_data {
+	uint8_t tt;
+	uint8_t cnt_ena;
+	uint8_t xqe_type;
+	uint16_t stag;
+	uint32_t tag;
+	uint32_t vwqe_max_sz_exp;
+	uint64_t vwqe_wait_tmo;
+	uint64_t vwqe_aura;
+};
+
 struct roc_sso {
 	struct plt_pci_device *pci_dev;
 	/* Public data. */
@@ -100,6 +111,12 @@ int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso,
 					 uint16_t nb_stash);
 void __roc_api roc_sso_hws_gwc_invalidate(struct roc_sso *roc_sso, uint8_t *hws,
 					  uint8_t nb_hws);
+int __roc_api roc_sso_hwgrp_agq_alloc(struct roc_sso *roc_sso, uint16_t hwgrp,
+				      struct roc_sso_agq_data *data);
+void __roc_api roc_sso_hwgrp_agq_free(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t agq_id);
+void __roc_api roc_sso_hwgrp_agq_release(struct roc_sso *roc_sso, uint16_t hwgrp);
+uint32_t __roc_api roc_sso_hwgrp_agq_from_tag(struct roc_sso *roc_sso, uint16_t hwgrp, uint32_t tag,
+					      uint8_t xqe_type);
 
 /* Utility function */
 uint16_t __roc_api roc_sso_pf_func_get(void);
@@ -107,7 +124,7 @@ uint16_t __roc_api roc_sso_pf_func_get(void);
 /* Debug */
 void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws,
 			    uint16_t hwgrp, FILE *f);
-int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint8_t hwgrp,
+int __roc_api roc_sso_hwgrp_stats_get(struct roc_sso *roc_sso, uint16_t hwgrp,
 				      struct roc_sso_hwgrp_stats *stats);
 int __roc_api roc_sso_hws_stats_get(struct roc_sso *roc_sso, uint8_t hws,
 				    struct roc_sso_hws_stats *stats);
diff --git a/drivers/common/cnxk/roc_sso_priv.h b/drivers/common/cnxk/roc_sso_priv.h
index 21c59c57e6..d6dc6dedd3 100644
--- a/drivers/common/cnxk/roc_sso_priv.h
+++ b/drivers/common/cnxk/roc_sso_priv.h
@@ -13,6 +13,10 @@ struct sso_rsrc {
 struct sso {
 	struct plt_pci_device *pci_dev;
 	struct dev dev;
+	/* EVA memory area */
+	uintptr_t agg_mem[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_used[MAX_RVU_BLKLF_CNT];
+	uint32_t agg_cnt[MAX_RVU_BLKLF_CNT];
 	/* Interrupt handler args. */
 	struct sso_rsrc hws_rsrc[MAX_RVU_BLKLF_CNT];
 	struct sso_rsrc hwgrp_rsrc[MAX_RVU_BLKLF_CNT];
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index efb5e44da8..c2d200f4ad 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -501,6 +501,10 @@ INTERNAL {
 	roc_sso_dev_fini;
 	roc_sso_dev_init;
 	roc_sso_dump;
+	roc_sso_hwgrp_agq_alloc;
+	roc_sso_hwgrp_agq_free;
+	roc_sso_hwgrp_agq_from_tag;
+	roc_sso_hwgrp_agq_release;
 	roc_sso_hwgrp_alloc_xaq;
 	roc_sso_hwgrp_base_get;
 	roc_sso_hwgrp_free_xaq_aura;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 20/22] event/cnxk: add Rx/Tx event vector support
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (17 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 19/22] common/cnxk: add SSO event aggregator pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 21/22] common/cnxk: update timer base code pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add Event vector support for CN20K Rx/Tx adapter.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c      | 185 ++++++++++++++++++++++-
 drivers/event/cnxk/cn20k_tx_worker.h     |  84 ++++++++++
 drivers/event/cnxk/cn20k_worker.h        |  63 ++++++++
 drivers/event/cnxk/cnxk_eventdev.h       |   3 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  16 +-
 5 files changed, 340 insertions(+), 11 deletions(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 5d49a5e5c6..57e15b6d8c 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -75,6 +75,7 @@ cn20k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
 	ws->fc_cache_space = (int64_t __rte_atomic *)dev->fc_cache_space;
 	ws->aw_lmt = dev->sso.lmt_base;
 	ws->gw_wdata = cnxk_sso_hws_prf_wdata(dev);
+	ws->lmt_base = dev->sso.lmt_base;
 
 	/* Set get_work timeout for HWS */
 	val = NSEC2USEC(dev->deq_tmo_ns);
@@ -595,7 +596,8 @@ cn20k_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev,
 	else
 		*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
 			RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
-			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+			RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -641,6 +643,156 @@ cn20k_sso_tstamp_hdl_update(uint16_t port_id, uint16_t flags, bool ptp_en)
 	eventdev_fops_tstamp_update(event_dev);
 }
 
+static int
+cn20k_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id, uint16_t port_id,
+		     const struct rte_event_eth_rx_adapter_queue_conf *queue_conf, int agq)
+{
+	struct roc_nix_rq *rq;
+	uint32_t tag_mask;
+	uint16_t wqe_skip;
+	uint8_t tt;
+	int rc;
+
+	rq = &cnxk_eth_dev->rqs[rq_id];
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+		tag_mask = agq;
+		tt = SSO_TT_AGG;
+		rq->flow_tag_width = 0;
+	} else {
+		tag_mask = (port_id & 0xFF) << 20;
+		tag_mask |= (RTE_EVENT_TYPE_ETHDEV << 28);
+		tt = queue_conf->ev.sched_type;
+		rq->flow_tag_width = 20;
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) {
+			rq->flow_tag_width = 0;
+			tag_mask |= queue_conf->ev.flow_id;
+		}
+	}
+
+	rq->tag_mask = tag_mask;
+	rq->sso_ena = 1;
+	rq->tt = tt;
+	rq->hwgrp = queue_conf->ev.queue_id;
+	wqe_skip = RTE_ALIGN_CEIL(sizeof(struct rte_mbuf), ROC_CACHE_LINE_SZ);
+	wqe_skip = wqe_skip / ROC_CACHE_LINE_SZ;
+	rq->wqe_skip = wqe_skip;
+
+	rc = roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+	return rc;
+}
+
+static int
+cn20k_sso_rx_adapter_vwqe_enable(struct cnxk_sso_evdev *dev, uint16_t port_id, uint16_t rq_id,
+				 const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	uint32_t agq, tag_mask, stag_mask;
+	struct roc_sso_agq_data data;
+	int rc;
+
+	tag_mask = (port_id & 0xff) << 20;
+	if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)
+		tag_mask |= queue_conf->ev.flow_id;
+	else
+		tag_mask |= rq_id;
+
+	stag_mask = tag_mask;
+	tag_mask |= RTE_EVENT_TYPE_ETHDEV_VECTOR << 28;
+	stag_mask |= RTE_EVENT_TYPE_ETHDEV << 28;
+
+	memset(&data, 0, sizeof(struct roc_sso_agq_data));
+	data.tag = tag_mask;
+	data.tt = queue_conf->ev.sched_type;
+	data.stag = stag_mask;
+	data.vwqe_aura = roc_npa_aura_handle_to_aura(queue_conf->vector_mp->pool_id);
+	data.vwqe_max_sz_exp = rte_log2_u32(queue_conf->vector_sz);
+	data.vwqe_wait_tmo = queue_conf->vector_timeout_ns / ((SSO_AGGR_DEF_TMO + 1) * 100);
+	data.xqe_type = 0;
+
+	rc = roc_sso_hwgrp_agq_alloc(&dev->sso, queue_conf->ev.queue_id, &data);
+	if (rc < 0)
+		return rc;
+
+	agq = roc_sso_hwgrp_agq_from_tag(&dev->sso, queue_conf->ev.queue_id, tag_mask, 0);
+	return agq;
+}
+
+static int
+cn20k_rx_adapter_queue_add(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id,
+			   const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	uint16_t port = eth_dev->data->port_id;
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	int i, rc = 0, agq = 0;
+
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			rc |= cn20k_rx_adapter_queue_add(event_dev, eth_dev, i, queue_conf);
+	} else {
+		rxq_sp = cnxk_eth_rxq_to_sp(eth_dev->data->rx_queues[rx_queue_id]);
+		cnxk_sso_updt_xae_cnt(dev, rxq_sp, RTE_EVENT_TYPE_ETHDEV);
+		rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+		if (queue_conf->rx_queue_flags & RTE_EVENT_ETH_RX_ADAPTER_QUEUE_EVENT_VECTOR) {
+			cnxk_sso_updt_xae_cnt(dev, queue_conf->vector_mp,
+					      RTE_EVENT_TYPE_ETHDEV_VECTOR);
+			rc = cnxk_sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)event_dev);
+			if (rc < 0)
+				return rc;
+
+			rc = cn20k_sso_rx_adapter_vwqe_enable(dev, port, rx_queue_id, queue_conf);
+			if (rc < 0)
+				return rc;
+			agq = rc;
+		}
+
+		rc = cn20k_sso_rxq_enable(cnxk_eth_dev, (uint16_t)rx_queue_id, port, queue_conf,
+					  agq);
+
+		/* Propagate force bp devarg */
+		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
+		cnxk_sso_tstamp_cfg(port, eth_dev, dev);
+		cnxk_eth_dev->nb_rxq_sso++;
+	}
+
+	if (rc < 0) {
+		plt_err("Failed to configure Rx adapter port=%d, q=%d", port,
+			queue_conf->ev.queue_id);
+		return rc;
+	}
+
+	dev->rx_offloads |= cnxk_eth_dev->rx_offload_flags;
+	return 0;
+}
+
+static int
+cn20k_rx_adapter_queue_del(const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+			   int32_t rx_queue_id)
+{
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	struct roc_nix_rq *rxq;
+	int i, rc = 0;
+
+	RTE_SET_USED(event_dev);
+	if (rx_queue_id < 0) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+			cn20k_rx_adapter_queue_del(event_dev, eth_dev, i);
+	} else {
+		rxq = &cnxk_eth_dev->rqs[rx_queue_id];
+		if (rxq->tt == SSO_TT_AGG)
+			roc_sso_hwgrp_agq_free(&dev->sso, rxq->hwgrp, rxq->tag_mask);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
+		cnxk_eth_dev->nb_rxq_sso--;
+	}
+
+	if (rc < 0)
+		plt_err("Failed to clear Rx adapter config port=%d, q=%d", eth_dev->data->port_id,
+			rx_queue_id);
+	return rc;
+}
+
 static int
 cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 			       const struct rte_eth_dev *eth_dev, int32_t rx_queue_id,
@@ -657,7 +809,7 @@ cn20k_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	rc = cnxk_sso_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
+	rc = cn20k_rx_adapter_queue_add(event_dev, eth_dev, rx_queue_id, queue_conf);
 	if (rc)
 		return -EINVAL;
 
@@ -690,7 +842,29 @@ cn20k_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 	if (rc)
 		return -EINVAL;
 
-	return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+	return cn20k_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
+}
+
+static int
+cn20k_sso_rx_adapter_vector_limits(const struct rte_eventdev *dev,
+				   const struct rte_eth_dev *eth_dev,
+				   struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+	int ret;
+
+	RTE_SET_USED(dev);
+	RTE_SET_USED(eth_dev);
+	ret = strncmp(eth_dev->device->driver->name, "net_cn20k", 8);
+	if (ret)
+		return -ENOTSUP;
+
+	limits->log2_sz = true;
+	limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+	limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+	limits->min_timeout_ns = (SSO_AGGR_DEF_TMO + 1) * 100;
+	limits->max_timeout_ns = (BITMASK_ULL(11, 0) + 1) * limits->min_timeout_ns;
+
+	return 0;
 }
 
 static int
@@ -704,7 +878,8 @@ cn20k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, const struct rte_e
 	if (ret)
 		*caps = 0;
 	else
-		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+		*caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+			RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
 
 	return 0;
 }
@@ -807,6 +982,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_rx_adapter_start = cnxk_sso_rx_adapter_start,
 	.eth_rx_adapter_stop = cnxk_sso_rx_adapter_stop,
 
+	.eth_rx_adapter_vector_limits_get = cn20k_sso_rx_adapter_vector_limits,
+
 	.eth_tx_adapter_caps_get = cn20k_sso_tx_adapter_caps_get,
 	.eth_tx_adapter_queue_add = cn20k_sso_tx_adapter_queue_add,
 	.eth_tx_adapter_queue_del = cn20k_sso_tx_adapter_queue_del,
diff --git a/drivers/event/cnxk/cn20k_tx_worker.h b/drivers/event/cnxk/cn20k_tx_worker.h
index c8ab560b0e..b09d845b09 100644
--- a/drivers/event/cnxk/cn20k_tx_worker.h
+++ b/drivers/event/cnxk/cn20k_tx_worker.h
@@ -139,10 +139,58 @@ cn20k_sso_tx_one(struct cn20k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd, ui
 	return 1;
 }
 
+static __rte_always_inline uint16_t
+cn20k_sso_vwqe_split_tx(struct cn20k_sso_hws *ws, struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+			uint64_t *cmd, const uint64_t *txq_data, const uint32_t flags)
+{
+	uint16_t count = 0, port, queue, ret = 0, last_idx = 0;
+	struct cn20k_eth_txq *txq;
+	int32_t space;
+	int i;
+
+	port = mbufs[0]->port;
+	queue = rte_event_eth_tx_adapter_txq_get(mbufs[0]);
+	for (i = 0; i < nb_mbufs; i++) {
+		if (port != mbufs[i]->port || queue != rte_event_eth_tx_adapter_txq_get(mbufs[i])) {
+			if (count) {
+				txq = (struct cn20k_eth_txq
+					       *)(txq_data[(txq_data[port] >> 48) + queue] &
+						  (BIT_ULL(48) - 1));
+				/* Transmit based on queue depth */
+				space = cn20k_sso_sq_depth(txq);
+				if (space < count)
+					goto done;
+				cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx],
+							   count, cmd, flags | NIX_TX_VWQE_F);
+				ret += count;
+				count = 0;
+			}
+			port = mbufs[i]->port;
+			queue = rte_event_eth_tx_adapter_txq_get(mbufs[i]);
+			last_idx = i;
+		}
+		count++;
+	}
+	if (count) {
+		txq = (struct cn20k_eth_txq *)(txq_data[(txq_data[port] >> 48) + queue] &
+					       (BIT_ULL(48) - 1));
+		/* Transmit based on queue depth */
+		space = cn20k_sso_sq_depth(txq);
+		if (space < count)
+			goto done;
+		cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, &mbufs[last_idx], count, cmd,
+					   flags | NIX_TX_VWQE_F);
+		ret += count;
+	}
+done:
+	return ret;
+}
+
 static __rte_always_inline uint16_t
 cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t *cmd,
 		       const uint64_t *txq_data, const uint32_t flags)
 {
+	struct cn20k_eth_txq *txq;
 	struct rte_mbuf *m;
 	uintptr_t lmt_addr;
 	uint16_t lmt_id;
@@ -150,6 +198,42 @@ cn20k_sso_hws_event_tx(struct cn20k_sso_hws *ws, struct rte_event *ev, uint64_t
 	lmt_addr = ws->lmt_base;
 	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
 
+	if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
+		struct rte_mbuf **mbufs = ev->vec->mbufs;
+		uint64_t meta = *(uint64_t *)ev->vec;
+		uint16_t offset, nb_pkts, left;
+		int32_t space;
+
+		nb_pkts = meta & 0xFFFF;
+		offset = (meta >> 16) & 0xFFF;
+		if (meta & BIT(31)) {
+			txq = (struct cn20k_eth_txq
+				       *)(txq_data[(txq_data[meta >> 32] >> 48) + (meta >> 48)] &
+					  (BIT_ULL(48) - 1));
+
+			/* Transmit based on queue depth */
+			space = cn20k_sso_sq_depth(txq);
+			if (space <= 0)
+				return 0;
+			nb_pkts = nb_pkts < space ? nb_pkts : (uint16_t)space;
+			cn20k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs + offset, nb_pkts,
+						   cmd, flags | NIX_TX_VWQE_F);
+		} else {
+			nb_pkts = cn20k_sso_vwqe_split_tx(ws, mbufs + offset, nb_pkts, cmd,
+							  txq_data, flags);
+		}
+		left = (meta & 0xFFFF) - nb_pkts;
+
+		if (!left) {
+			rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+		} else {
+			*(uint64_t *)ev->vec =
+				(meta & ~0xFFFFFFFUL) | (((uint32_t)nb_pkts + offset) << 16) | left;
+		}
+		rte_prefetch0(ws);
+		return !left;
+	}
+
 	m = ev->mbuf;
 	return cn20k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data, flags);
 }
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 9075073fd2..5799e5cc49 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -41,6 +41,58 @@ cn20k_sso_process_tstamp(uint64_t u64, uint64_t mbuf, struct cnxk_timesync_info
 	}
 }
 
+static __rte_always_inline void
+cn20k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, struct cn20k_sso_hws *ws)
+{
+	uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM;
+	struct cnxk_timesync_info *tstamp = ws->tstamp[port_id];
+	void *lookup_mem = ws->lookup_mem;
+	uintptr_t lbase = ws->lmt_base;
+	struct rte_event_vector *vec;
+	uint16_t nb_mbufs, non_vec;
+	struct rte_mbuf **wqe;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+	int i;
+
+	mbuf_init |= ((uint64_t)port_id) << 48;
+	vec = (struct rte_event_vector *)vwqe;
+	wqe = vec->mbufs;
+
+	rte_prefetch0(&vec->ptrs[0]);
+#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
+	for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
+		rte_prefetch0(&vec->ptrs[i]);
+
+	if (flags & NIX_RX_OFFLOAD_TSTAMP_F && tstamp)
+		mbuf_init |= 8;
+
+	nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+	nb_mbufs = cn20k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs, flags | NIX_RX_VWQE_F,
+					      lookup_mem, tstamp, lbase, 0);
+	wqe += nb_mbufs;
+	non_vec = vec->nb_elem - nb_mbufs;
+
+	while (non_vec) {
+		struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+
+		mbuf = (struct rte_mbuf *)((char *)cqe - sizeof(struct rte_mbuf));
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+
+		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
+			cn20k_sso_process_tstamp((uint64_t)wqe[0], (uint64_t)mbuf, tstamp);
+		wqe[0] = (struct rte_mbuf *)mbuf;
+		non_vec--;
+		wqe++;
+	}
+}
+
 static __rte_always_inline void
 cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
 {
@@ -65,6 +117,17 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		if (flags & NIX_RX_OFFLOAD_TSTAMP_F)
 			cn20k_sso_process_tstamp(u64[1], mbuf, ws->tstamp[port]);
 		u64[1] = mbuf;
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+		uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+		__uint128_t vwqe_hdr = *(__uint128_t *)u64[1];
+
+		vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | ((vwqe_hdr & 0xFFFF) << 48) |
+			   ((uint64_t)port << 32);
+		*(uint64_t *)u64[1] = (uint64_t)vwqe_hdr;
+		cn20k_process_vwqe(u64[1], port, flags, ws);
+		/* Mark vector mempool object as get */
+		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
+					  1);
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index 4066497e6b..33b3538753 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -266,6 +266,9 @@ int cnxk_sso_rx_adapter_start(const struct rte_eventdev *event_dev,
 			      const struct rte_eth_dev *eth_dev);
 int cnxk_sso_rx_adapter_stop(const struct rte_eventdev *event_dev,
 			     const struct rte_eth_dev *eth_dev);
+void cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev,
+			 struct cnxk_sso_evdev *dev);
+int cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id);
 int cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
 				  const struct rte_eth_dev *eth_dev,
 				  int32_t tx_queue_id);
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 3cac42111a..4cf48db74c 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -167,9 +167,10 @@ cnxk_sso_rxq_enable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id,
 	return rc;
 }
 
-static int
-cnxk_sso_rxq_disable(struct cnxk_eth_dev *cnxk_eth_dev, uint16_t rq_id)
+int
+cnxk_sso_rxq_disable(const struct rte_eth_dev *eth_dev, uint16_t rq_id)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
 	struct roc_nix_rq *rq;
 
 	rq = &cnxk_eth_dev->rqs[rq_id];
@@ -209,10 +210,11 @@ cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
 	return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
 }
 
-static void
-cnxk_sso_tstamp_cfg(uint16_t port_id, struct cnxk_eth_dev *cnxk_eth_dev,
-		    struct cnxk_sso_evdev *dev)
+void
+cnxk_sso_tstamp_cfg(uint16_t port_id, const struct rte_eth_dev *eth_dev, struct cnxk_sso_evdev *dev)
 {
+	struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+
 	if (cnxk_eth_dev->rx_offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP || cnxk_eth_dev->ptp_en)
 		dev->tstamp[port_id] = &cnxk_eth_dev->tstamp;
 }
@@ -263,7 +265,7 @@ cnxk_sso_rx_adapter_queue_add(
 
 		/* Propagate force bp devarg */
 		cnxk_eth_dev->nix.force_rx_aura_bp = dev->force_ena_bp;
-		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, cnxk_eth_dev, dev);
+		cnxk_sso_tstamp_cfg(eth_dev->data->port_id, eth_dev, dev);
 		cnxk_eth_dev->nb_rxq_sso++;
 	}
 
@@ -290,7 +292,7 @@ cnxk_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev,
 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
 			cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, i);
 	} else {
-		rc = cnxk_sso_rxq_disable(cnxk_eth_dev, (uint16_t)rx_queue_id);
+		rc = cnxk_sso_rxq_disable(eth_dev, (uint16_t)rx_queue_id);
 		cnxk_eth_dev->nb_rxq_sso--;
 
 		/* Enable drop_re if it was disabled earlier */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 21/22] common/cnxk: update timer base code
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (18 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-28 15:59               ` [PATCH v8 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
  20 siblings, 0 replies; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Update event timer base code to support configuring
HW accelerated timer arm and cancel.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/hw/tim.h        |  5 ++
 drivers/common/cnxk/roc_mbox.h      | 38 ++++++++++++-
 drivers/common/cnxk/roc_tim.c       | 84 ++++++++++++++++++++++++++---
 drivers/common/cnxk/roc_tim.h       | 20 +++++--
 drivers/common/cnxk/version.map     |  1 +
 drivers/event/cnxk/cnxk_tim_evdev.h |  5 --
 6 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index 82b094e3dc..75700a11b8 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -47,10 +47,15 @@
 #define TIM_LF_RAS_INT_ENA_W1S	   (0x310)
 #define TIM_LF_RAS_INT_ENA_W1C	   (0x318)
 #define TIM_LF_RING_REL		   (0x400)
+#define TIM_LF_SCHED_TIMER0	   (0x480)
+#define TIM_LF_RING_FIRST_EXPIRY   (0x558)
 
 #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
+#define TIM_MAX_INTERVAL_EXT_TICKS ((1ULL << 34) - 1)
 #define TIM_MAX_BUCKET_SIZE    ((1ULL << 20) - 2)
 #define TIM_MIN_BUCKET_SIZE    1
 #define TIM_BUCKET_WRAP_SIZE   3
+#define TIM_BUCKET_MIN_GAP     1
+#define TIM_NPA_TMO            0xFFFF
 
 #endif /* __TIM_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 960535eca0..790c203a2c 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -164,6 +164,9 @@ struct mbox_msghdr {
 	  tim_intvl_rsp)                                                       \
 	M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req,          \
 	  tim_capture_rsp)                                                     \
+	M(TIM_CONFIG_HWWQE,    0x807, tim_config_hwwqe, tim_cfg_hwwqe_req,     \
+	  msg_rsp)                                                             \
+	M(TIM_GET_HW_INFO,     0x808, tim_get_hw_info, msg_req, tim_hw_info)   \
 	/* CPT mbox IDs (range 0xA00 - 0xBFF) */                               \
 	M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)    \
 	M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)                   \
@@ -2801,6 +2804,7 @@ enum tim_af_status {
 	TIM_AF_INVALID_ENABLE_DONTFREE = -815,
 	TIM_AF_ENA_DONTFRE_NSET_PERIODIC = -816,
 	TIM_AF_RING_ALREADY_DISABLED = -817,
+	TIM_AF_LF_START_SYNC_FAIL = -818,
 };
 
 enum tim_clk_srcs {
@@ -2893,13 +2897,43 @@ struct tim_config_req {
 	uint8_t __io enabledontfreebuffer;
 	uint32_t __io bucketsize;
 	uint32_t __io chunksize;
-	uint32_t __io interval;
+	uint32_t __io interval_lo;
 	uint8_t __io gpioedge;
-	uint8_t __io rsvd[7];
+	uint8_t __io rsvd[3];
+	uint32_t __io interval_hi;
 	uint64_t __io intervalns;
 	uint64_t __io clockfreq;
 };
 
+struct tim_cfg_hwwqe_req {
+	struct mbox_msghdr hdr;
+	uint16_t __io ring;
+	uint8_t __io grp_ena;
+	uint8_t __io hwwqe_ena;
+	uint8_t __io ins_min_gap;
+	uint8_t __io flw_ctrl_ena;
+	uint8_t __io wqe_rd_clr_ena;
+	uint16_t __io grp_tmo_cntr;
+	uint16_t __io npa_tmo_cntr;
+	uint16_t __io result_offset;
+	uint16_t __io event_count_offset;
+	uint64_t __io rsvd[2];
+};
+
+struct tim_feat_info {
+	uint16_t __io rings;
+	uint8_t __io engines;
+	uint8_t __io hwwqe : 1;
+	uint8_t __io intvl_ext : 1;
+	uint8_t __io rsvd8[4];
+	uint64_t __io rsvd[2];
+};
+
+struct tim_hw_info {
+	struct mbox_msghdr hdr;
+	struct tim_feat_info feat;
+};
+
 struct tim_lf_alloc_rsp {
 	struct mbox_msghdr hdr;
 	uint64_t __io tenns_clk;
diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index 83228fb2b6..e326ea0122 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+#define LF_ENABLE_RETRY_CNT 8
+
 static int
 tim_fill_msix(struct roc_tim *roc_tim, uint16_t nb_ring)
 {
@@ -86,8 +88,11 @@ tim_err_desc(int rc)
 	case TIM_AF_RING_ALREADY_DISABLED:
 		plt_err("Ring already stopped");
 		break;
+	case TIM_AF_LF_START_SYNC_FAIL:
+		plt_err("Ring start sync failed.");
+		break;
 	default:
-		plt_err("Unknown Error.");
+		plt_err("Unknown Error: %d", rc);
 	}
 }
 
@@ -123,10 +128,12 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
 	struct mbox *mbox = mbox_get(dev->mbox);
+	uint8_t retry_cnt = LF_ENABLE_RETRY_CNT;
 	struct tim_enable_rsp *rsp;
 	struct tim_ring_req *req;
 	int rc = -ENOSPC;
 
+retry:
 	req = mbox_alloc_msg_tim_enable_ring(mbox);
 	if (req == NULL)
 		goto fail;
@@ -134,6 +141,9 @@ roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t *start_tsc,
 
 	rc = mbox_process_msg(dev->mbox, (void **)&rsp);
 	if (rc) {
+		if (rc == TIM_AF_LF_START_SYNC_FAIL && retry_cnt--)
+			goto retry;
+
 		tim_err_desc(rc);
 		rc = -EIO;
 		goto fail;
@@ -183,10 +193,9 @@ roc_tim_lf_base_get(struct roc_tim *roc_tim, uint8_t ring_id)
 }
 
 int
-roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-		  enum roc_tim_clk_src clk_src, uint8_t ena_periodic,
-		  uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
-		  uint32_t interval, uint64_t intervalns, uint64_t clockfreq)
+roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id, enum roc_tim_clk_src clk_src,
+		  uint8_t ena_periodic, uint8_t ena_dfb, uint32_t bucket_sz, uint32_t chunk_sz,
+		  uint64_t interval, uint64_t intervalns, uint64_t clockfreq)
 {
 	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
 	struct dev *dev = &sso->dev;
@@ -204,7 +213,8 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	req->clocksource = clk_src;
 	req->enableperiodic = ena_periodic;
 	req->enabledontfreebuffer = ena_dfb;
-	req->interval = interval;
+	req->interval_lo = interval;
+	req->interval_hi = interval >> 32;
 	req->intervalns = intervalns;
 	req->clockfreq = clockfreq;
 	req->gpioedge = TIM_GPIO_LTOH_TRANS;
@@ -220,6 +230,41 @@ roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
 	return rc;
 }
 
+int
+roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id, struct roc_tim_hwwqe_cfg *cfg)
+{
+	struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
+	struct dev *dev = &sso->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_cfg_hwwqe_req *req;
+	int rc = -ENOSPC;
+
+	req = mbox_alloc_msg_tim_config_hwwqe(mbox);
+	if (req == NULL)
+		goto fail;
+	req->ring = ring_id;
+	req->hwwqe_ena = cfg->hwwqe_ena;
+	req->grp_ena = cfg->grp_ena;
+	req->grp_tmo_cntr = cfg->grp_tmo_cyc;
+	req->flw_ctrl_ena = cfg->flw_ctrl_ena;
+	req->result_offset = cfg->result_offset;
+	req->event_count_offset = cfg->event_count_offset;
+
+	req->wqe_rd_clr_ena = 1;
+	req->npa_tmo_cntr = TIM_NPA_TMO;
+	req->ins_min_gap = TIM_BUCKET_MIN_GAP;
+
+	rc = mbox_process(mbox);
+	if (rc) {
+		tim_err_desc(rc);
+		rc = -EIO;
+	}
+
+fail:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_lf_interval(struct roc_tim *roc_tim, enum roc_tim_clk_src clk_src,
 		    uint64_t clockfreq, uint64_t *intervalns,
@@ -353,6 +398,31 @@ tim_free_lf_count_get(struct dev *dev, uint16_t *nb_lfs)
 	return 0;
 }
 
+static int
+tim_hw_info_get(struct roc_tim *roc_tim)
+{
+	struct dev *dev = &roc_sso_to_sso_priv(roc_tim->roc_sso)->dev;
+	struct mbox *mbox = mbox_get(dev->mbox);
+	struct tim_hw_info *rsp;
+	int rc;
+
+	mbox_alloc_msg_tim_get_hw_info(mbox);
+	rc = mbox_process_msg(mbox, (void **)&rsp);
+	if (rc && rc != MBOX_MSG_INVALID) {
+		plt_err("Failed to get SSO HW info");
+		rc = -EIO;
+		goto exit;
+	}
+
+	if (rc != MBOX_MSG_INVALID)
+		mbox_memcpy(&roc_tim->feat, &rsp->feat, sizeof(roc_tim->feat));
+
+	rc = 0;
+exit:
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 roc_tim_init(struct roc_tim *roc_tim)
 {
@@ -372,6 +442,8 @@ roc_tim_init(struct roc_tim *roc_tim)
 	PLT_STATIC_ASSERT(sizeof(struct tim) <= TIM_MEM_SZ);
 	nb_lfs = roc_tim->nb_lfs;
 
+	rc = tim_hw_info_get(roc_tim);
+
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
 		plt_tim_dbg("Failed to get TIM resource count");
diff --git a/drivers/common/cnxk/roc_tim.h b/drivers/common/cnxk/roc_tim.h
index f9a9ad1887..2eb6e6962b 100644
--- a/drivers/common/cnxk/roc_tim.h
+++ b/drivers/common/cnxk/roc_tim.h
@@ -19,10 +19,20 @@ enum roc_tim_clk_src {
 	ROC_TIM_CLK_SRC_INVALID,
 };
 
+struct roc_tim_hwwqe_cfg {
+	uint8_t grp_ena;
+	uint8_t hwwqe_ena;
+	uint8_t flw_ctrl_ena;
+	uint16_t grp_tmo_cyc;
+	uint16_t result_offset;
+	uint16_t event_count_offset;
+};
+
 struct roc_tim {
 	struct roc_sso *roc_sso;
 	/* Public data. */
 	uint16_t nb_lfs;
+	struct tim_feat_info feat;
 	/* Private data. */
 #define TIM_MEM_SZ (1 * 1024)
 	uint8_t reserved[TIM_MEM_SZ] __plt_cache_aligned;
@@ -36,11 +46,11 @@ int __roc_api roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id,
 				uint64_t *start_tsc, uint32_t *cur_bkt);
 int __roc_api roc_tim_lf_disable(struct roc_tim *roc_tim, uint8_t ring_id);
 int __roc_api roc_tim_lf_config(struct roc_tim *roc_tim, uint8_t ring_id,
-				enum roc_tim_clk_src clk_src,
-				uint8_t ena_periodic, uint8_t ena_dfb,
-				uint32_t bucket_sz, uint32_t chunk_sz,
-				uint32_t interval, uint64_t intervalns,
-				uint64_t clockfreq);
+				enum roc_tim_clk_src clk_src, uint8_t ena_periodic, uint8_t ena_dfb,
+				uint32_t bucket_sz, uint32_t chunk_sz, uint64_t interval,
+				uint64_t intervalns, uint64_t clockfreq);
+int __roc_api roc_tim_lf_config_hwwqe(struct roc_tim *roc_tim, uint8_t ring_id,
+				      struct roc_tim_hwwqe_cfg *cfg);
 int __roc_api roc_tim_lf_interval(struct roc_tim *roc_tim,
 				  enum roc_tim_clk_src clk_src,
 				  uint64_t clockfreq, uint64_t *intervalns,
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index c2d200f4ad..bb50eea72e 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -529,6 +529,7 @@ INTERNAL {
 	roc_tim_lf_alloc;
 	roc_tim_lf_base_get;
 	roc_tim_lf_config;
+	roc_tim_lf_config_hwwqe;
 	roc_tim_lf_disable;
 	roc_tim_lf_enable;
 	roc_tim_lf_free;
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index f4c61dfb44..c5b3d67eb8 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -16,11 +16,6 @@
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
 
-#include "hw/tim.h"
-
-#include "roc_model.h"
-#include "roc_tim.h"
-
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
 #define TICK2NSEC(__tck, __freq) (((__tck)*NSECPERSEC) / (__freq))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* [PATCH v8 22/22] event/cnxk: add CN20K timer adapter
  2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
                                 ` (19 preceding siblings ...)
  2024-10-28 15:59               ` [PATCH v8 21/22] common/cnxk: update timer base code pbhagavatula
@ 2024-10-28 15:59               ` pbhagavatula
  2024-10-30 14:31                 ` Jerin Jacob
  20 siblings, 1 reply; 181+ messages in thread
From: pbhagavatula @ 2024-10-28 15:59 UTC (permalink / raw)
  To: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Pavan Nikhilesh, Shijith Thotton
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add event timer adapter support for CN20K platform.
Implement new HWWQE insertion feature supported by CN20K platform.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_tim.c        |   6 +-
 drivers/event/cnxk/cn20k_eventdev.c  |  16 ++-
 drivers/event/cnxk/cn20k_worker.h    |   6 +
 drivers/event/cnxk/cnxk_tim_evdev.c  |  37 ++++-
 drivers/event/cnxk/cnxk_tim_evdev.h  |  14 ++
 drivers/event/cnxk/cnxk_tim_worker.c |  82 +++++++++--
 drivers/event/cnxk/cnxk_tim_worker.h | 201 +++++++++++++++++++++++++++
 7 files changed, 350 insertions(+), 12 deletions(-)

diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index e326ea0122..a1461fedb1 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -409,7 +409,7 @@ tim_hw_info_get(struct roc_tim *roc_tim)
 	mbox_alloc_msg_tim_get_hw_info(mbox);
 	rc = mbox_process_msg(mbox, (void **)&rsp);
 	if (rc && rc != MBOX_MSG_INVALID) {
-		plt_err("Failed to get SSO HW info");
+		plt_err("Failed to get TIM HW info");
 		rc = -EIO;
 		goto exit;
 	}
@@ -443,6 +443,10 @@ roc_tim_init(struct roc_tim *roc_tim)
 	nb_lfs = roc_tim->nb_lfs;
 
 	rc = tim_hw_info_get(roc_tim);
+	if (rc) {
+		plt_tim_dbg("Failed to get TIM HW info");
+		return 0;
+	}
 
 	rc = tim_free_lf_count_get(dev, &nb_free_lfs);
 	if (rc) {
diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index 57e15b6d8c..d68700fc05 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -957,6 +957,13 @@ cn20k_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev,
 	return cn20k_sso_updt_tx_adptr_data(event_dev);
 }
 
+static int
+cn20k_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags, uint32_t *caps,
+		   const struct event_timer_adapter_ops **ops)
+{
+	return cnxk_tim_caps_get(evdev, flags, caps, ops, cn20k_sso_set_priv_mem);
+}
+
 static struct eventdev_ops cn20k_sso_dev_ops = {
 	.dev_infos_get = cn20k_sso_info_get,
 	.dev_configure = cn20k_sso_dev_configure,
@@ -991,6 +998,8 @@ static struct eventdev_ops cn20k_sso_dev_ops = {
 	.eth_tx_adapter_stop = cnxk_sso_tx_adapter_stop,
 	.eth_tx_adapter_free = cnxk_sso_tx_adapter_free,
 
+	.timer_adapter_caps_get = cn20k_tim_caps_get,
+
 	.xstats_get = cnxk_sso_xstats_get,
 	.xstats_reset = cnxk_sso_xstats_reset,
 	.xstats_get_names = cnxk_sso_xstats_get_names,
@@ -1068,4 +1077,9 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn20k,
 			      CNXK_SSO_XAE_CNT "=<int>"
 			      CNXK_SSO_GGRP_QOS "=<string>"
 			      CNXK_SSO_STASH "=<string>"
-			      CNXK_SSO_FORCE_BP "=1");
+			      CNXK_SSO_FORCE_BP "=1"
+			      CNXK_TIM_DISABLE_NPA "=1"
+			      CNXK_TIM_CHNK_SLOTS "=<int>"
+			      CNXK_TIM_RINGS_LMT "=<int>"
+			      CNXK_TIM_STATS_ENA "=1"
+			      CNXK_TIM_EXT_CLK "=<string>");
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5799e5cc49..b014e549b9 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -5,6 +5,7 @@
 #ifndef __CN20K_WORKER_H__
 #define __CN20K_WORKER_H__
 
+#include <rte_event_timer_adapter.h>
 #include <rte_eventdev.h>
 
 #include "cn20k_eventdev.h"
@@ -128,6 +129,11 @@ cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32
 		/* Mark vector mempool object as get */
 		RTE_MEMPOOL_CHECK_COOKIES(rte_mempool_from_obj((void *)u64[1]), (void **)&u64[1], 1,
 					  1);
+	} else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_TIMER) {
+		struct rte_event_timer *tev = (struct rte_event_timer *)u64[1];
+
+		tev->state = RTE_EVENT_TIMER_NOT_ARMED;
+		u64[1] = tev->ev.u64;
 	}
 }
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.c b/drivers/event/cnxk/cnxk_tim_evdev.c
index 27a4dfb490..994d1d1090 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.c
+++ b/drivers/event/cnxk/cnxk_tim_evdev.c
@@ -78,9 +78,25 @@ cnxk_tim_chnk_pool_create(struct cnxk_tim_ring *tim_ring,
 	return rc;
 }
 
+static int
+cnxk_tim_enable_hwwqe(struct cnxk_tim_evdev *dev, struct cnxk_tim_ring *tim_ring)
+{
+	struct roc_tim_hwwqe_cfg hwwqe_cfg;
+
+	memset(&hwwqe_cfg, 0, sizeof(hwwqe_cfg));
+	hwwqe_cfg.hwwqe_ena = 1;
+	hwwqe_cfg.grp_ena = 0;
+	hwwqe_cfg.flw_ctrl_ena = 0;
+	hwwqe_cfg.result_offset = CNXK_TIM_HWWQE_RES_OFFSET_B;
+
+	tim_ring->lmt_base = dev->tim.roc_sso->lmt_base;
+	return roc_tim_lf_config_hwwqe(&dev->tim, tim_ring->ring_id, &hwwqe_cfg);
+}
+
 static void
 cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 {
+	struct cnxk_tim_evdev *dev = cnxk_tim_priv_get();
 	uint8_t prod_flag = !tim_ring->prod_type_sp;
 
 	/* [STATS] [DFB/FB] [SP][MP]*/
@@ -98,6 +114,16 @@ cnxk_tim_set_fp_ops(struct cnxk_tim_ring *tim_ring)
 #undef FP
 	};
 
+	if (dev == NULL)
+		return;
+
+	if (dev->tim.feat.hwwqe) {
+		cnxk_tim_ops.arm_burst = cnxk_tim_arm_burst_hwwqe;
+		cnxk_tim_ops.arm_tmo_tick_burst = cnxk_tim_arm_tmo_burst_hwwqe;
+		cnxk_tim_ops.cancel_burst = cnxk_tim_timer_cancel_burst_hwwqe;
+		return;
+	}
+
 	cnxk_tim_ops.arm_burst =
 		arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag];
 	cnxk_tim_ops.arm_tmo_tick_burst =
@@ -224,12 +250,13 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		}
 	}
 
-	if (tim_ring->disable_npa) {
+	if (!dev->tim.feat.hwwqe && tim_ring->disable_npa) {
 		tim_ring->nb_chunks =
 			tim_ring->nb_timers /
 			CNXK_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
 		tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts;
 	} else {
+		tim_ring->disable_npa = 0;
 		tim_ring->nb_chunks = tim_ring->nb_timers;
 	}
 
@@ -255,6 +282,14 @@ cnxk_tim_ring_create(struct rte_event_timer_adapter *adptr)
 		goto tim_chnk_free;
 	}
 
+	if (dev->tim.feat.hwwqe) {
+		rc = cnxk_tim_enable_hwwqe(dev, tim_ring);
+		if (rc < 0) {
+			plt_err("Failed to enable hwwqe");
+			goto tim_chnk_free;
+		}
+	}
+
 	plt_write64((uint64_t)tim_ring->bkt, tim_ring->base + TIM_LF_RING_BASE);
 	plt_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
diff --git a/drivers/event/cnxk/cnxk_tim_evdev.h b/drivers/event/cnxk/cnxk_tim_evdev.h
index c5b3d67eb8..114a89ee5a 100644
--- a/drivers/event/cnxk/cnxk_tim_evdev.h
+++ b/drivers/event/cnxk/cnxk_tim_evdev.h
@@ -15,6 +15,7 @@
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_reciprocal.h>
+#include <rte_vect.h>
 
 #define NSECPERSEC		 1E9
 #define USECPERSEC		 1E6
@@ -29,6 +30,8 @@
 #define CNXK_TIM_MIN_CHUNK_SLOTS    (0x1)
 #define CNXK_TIM_MAX_CHUNK_SLOTS    (0x1FFE)
 #define CNXK_TIM_MAX_POOL_CACHE_SZ  (16)
+#define CNXK_TIM_HWWQE_RES_OFFSET_B (24)
+#define CNXK_TIM_ENT_PER_LMT	    (7)
 
 #define CN9K_TIM_MIN_TMO_TKS (256)
 
@@ -124,6 +127,7 @@ struct __rte_cache_aligned cnxk_tim_ring {
 	uintptr_t tbase;
 	uint64_t (*tick_fn)(uint64_t tbase);
 	uint64_t ring_start_cyc;
+	uint64_t lmt_base;
 	struct cnxk_tim_bkt *bkt;
 	struct rte_mempool *chunk_pool;
 	struct rte_reciprocal_u64 fast_div;
@@ -310,11 +314,21 @@ TIM_ARM_FASTPATH_MODES
 TIM_ARM_TMO_FASTPATH_MODES
 #undef FP
 
+uint16_t cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers);
+
+uint16_t cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				      struct rte_event_timer **tim, const uint64_t timeout_tick,
+				      const uint16_t nb_timers);
+
 uint16_t
 cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 			    struct rte_event_timer **tim,
 			    const uint16_t nb_timers);
 
+uint16_t cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+					   struct rte_event_timer **tim, const uint16_t nb_timers);
+
 int cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 				 const struct rte_event_timer *evtim, uint64_t *ticks_remaining);
 
diff --git a/drivers/event/cnxk/cnxk_tim_worker.c b/drivers/event/cnxk/cnxk_tim_worker.c
index 5e96f6f188..42d376d375 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.c
+++ b/drivers/event/cnxk/cnxk_tim_worker.c
@@ -32,15 +32,6 @@ cnxk_tim_arm_checks(const struct cnxk_tim_ring *const tim_ring,
 	return -EINVAL;
 }
 
-static inline void
-cnxk_tim_format_event(const struct rte_event_timer *const tim,
-		      struct cnxk_tim_ent *const entry)
-{
-	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 |
-		    (tim->ev.event & 0xFFFFFFFFF);
-	entry->wqe = tim->ev.u64;
-}
-
 static __rte_always_inline uint16_t
 cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 			 struct rte_event_timer **tim, const uint16_t nb_timers,
@@ -77,6 +68,24 @@ cnxk_tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_arm_burst_hwwqe(const struct rte_event_timer_adapter *adptr, struct rte_event_timer **tim,
+			 const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t index;
+
+	for (index = 0; index < nb_timers; index++) {
+		if (cnxk_tim_arm_checks(tim_ring, tim[index]))
+			break;
+
+		if (cnxk_tim_add_entry_hwwqe(tim_ring, tim[index]))
+			break;
+	}
+
+	return index;
+}
+
 #define FP(_name, _f3, _f2, _f1, _flags)                                       \
 	uint16_t __rte_noinline cnxk_tim_arm_burst_##_name(                    \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -132,6 +141,29 @@ cnxk_tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
 	return set_timers;
 }
 
+uint16_t
+cnxk_tim_arm_tmo_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+			     struct rte_event_timer **tim, const uint64_t timeout_tick,
+			     const uint16_t nb_timers)
+{
+	struct cnxk_tim_ring *tim_ring = adptr->data->adapter_priv;
+	uint16_t idx;
+
+	if (unlikely(!timeout_tick || timeout_tick > tim_ring->nb_bkts)) {
+		const enum rte_event_timer_state state = timeout_tick ?
+								 RTE_EVENT_TIMER_ERROR_TOOLATE :
+								 RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		for (idx = 0; idx < nb_timers; idx++)
+			tim[idx]->state = state;
+
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	return cnxk_tim_add_entry_tmo_hwwqe(tim_ring, tim, timeout_tick * tim_ring->tck_int,
+					    nb_timers);
+}
+
 #define FP(_name, _f2, _f1, _flags)                                            \
 	uint16_t __rte_noinline cnxk_tim_arm_tmo_tick_burst_##_name(           \
 		const struct rte_event_timer_adapter *adptr,                   \
@@ -174,6 +206,38 @@ cnxk_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr,
 	return index;
 }
 
+uint16_t
+cnxk_tim_timer_cancel_burst_hwwqe(const struct rte_event_timer_adapter *adptr,
+				  struct rte_event_timer **tim, const uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t i;
+
+	RTE_SET_USED(adptr);
+	for (i = 0; i < nb_timers; i++) {
+		if (tim[i]->state == RTE_EVENT_TIMER_CANCELED) {
+			rte_errno = EALREADY;
+			break;
+		}
+
+		if (tim[i]->state != RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		status = (uint64_t __rte_atomic *)&tim[i]->impl_opaque[1];
+		if (!rte_atomic_compare_exchange_strong_explicit(status, (uint64_t *)&tim[i], 0,
+								 rte_memory_order_release,
+								 rte_memory_order_relaxed)) {
+			rte_errno = ENOENT;
+			break;
+		}
+		tim[i]->state = RTE_EVENT_TIMER_CANCELED;
+	}
+
+	return i;
+}
+
 int
 cnxk_tim_remaining_ticks_get(const struct rte_event_timer_adapter *adapter,
 			     const struct rte_event_timer *evtim, uint64_t *ticks_remaining)
diff --git a/drivers/event/cnxk/cnxk_tim_worker.h b/drivers/event/cnxk/cnxk_tim_worker.h
index e52eadbc08..be6744db51 100644
--- a/drivers/event/cnxk/cnxk_tim_worker.h
+++ b/drivers/event/cnxk/cnxk_tim_worker.h
@@ -132,6 +132,13 @@ cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
 	return (n - (d * rte_reciprocal_divide_u64(n, &R)));
 }
 
+static inline void
+cnxk_tim_format_event(const struct rte_event_timer *const tim, struct cnxk_tim_ent *const entry)
+{
+	entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	entry->wqe = tim->ev.u64;
+}
+
 static __rte_always_inline void
 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
 			   const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
@@ -573,6 +580,200 @@ cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
 	return nb_timers;
 }
 
+static int
+cnxk_tim_add_entry_hwwqe(struct cnxk_tim_ring *const tim_ring, struct rte_event_timer *const tim)
+{
+	uint64_t __rte_atomic *status;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+	int rc = 0;
+
+	status = (uint64_t __rte_atomic *)&tim->impl_opaque[0];
+	status[0] = 0;
+	status[1] = 0;
+
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	lmt = (uint64_t *)lmt_addr;
+
+	lmt[0] = tim->timeout_ticks * tim_ring->tck_int;
+	lmt[1] = 0x1;
+	lmt[2] = (tim->ev.event & 0xFFC000000000) >> 6 | (tim->ev.event & 0xFFFFFFFFF);
+	lmt[3] = (uint64_t)tim;
+
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (1UL << 4);
+	roc_lmt_submit_steorl(wdata, pa);
+
+	do {
+		rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+		rsp &= 0xF0UL;
+	} while (!rsp);
+
+	rsp >>= 4;
+	switch (rsp) {
+	case 0x3:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+		rc = !rc;
+		break;
+	case 0x4:
+		tim->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+		rc = !rc;
+		break;
+	case 0x1:
+		tim->state = RTE_EVENT_TIMER_ARMED;
+		break;
+	default:
+		tim->state = RTE_EVENT_TIMER_ERROR;
+		rc = !rc;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+cnxk_tim_add_entry_tmo_hwwqe(struct cnxk_tim_ring *const tim_ring,
+			     struct rte_event_timer **const tim, uint64_t intvl, uint16_t nb_timers)
+{
+	uint64_t __rte_atomic *status;
+	uint16_t cnt, i, j, done;
+	uint64_t wdata, pa;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uint64_t *lmt;
+	uint64_t rsp;
+
+	/* We have 32 LMTLINES per core, but use only 1 line as we need to check status */
+	lmt_addr = tim_ring->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+
+	done = 0;
+	lmt = (uint64_t *)lmt_addr;
+	/* We can do upto 7 timers per LMTLINE */
+	cnt = nb_timers / CNXK_TIM_ENT_PER_LMT;
+
+	lmt[0] = intvl;
+	lmt[1] = 0x1; /* Always relative */
+	/* One LMT line is used, CNTM1 is 0 and SIZE_VEC is not included. */
+	wdata = lmt_id;
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)(CNXK_TIM_ENT_PER_LMT << 4);
+	for (i = 0; i < cnt; i++) {
+		status = (uint64_t __rte_atomic *)&tim[i * CNXK_TIM_ENT_PER_LMT]->impl_opaque[0];
+
+		for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+			cnxk_tim_format_event(tim[(i * CNXK_TIM_ENT_PER_LMT) + j],
+					      (struct cnxk_tim_ent *)&lmt[(j << 1) + 2]);
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[0] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->impl_opaque[1] = 0;
+			tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += CNXK_TIM_ENT_PER_LMT;
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x4:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++)
+					tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+						RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= CNXK_TIM_ENT_PER_LMT;
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < CNXK_TIM_ENT_PER_LMT; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic
+							      *)&tim[(i * CNXK_TIM_ENT_PER_LMT) + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[(i * CNXK_TIM_ENT_PER_LMT) + j]->state =
+							RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+			goto done;
+		}
+	}
+
+	/* SIZEM1 is 0 */
+	pa = (tim_ring->tbase & ~0xFF) + TIM_LF_SCHED_TIMER0;
+	pa |= (uint64_t)((nb_timers - cnt) << 4);
+	if (nb_timers - cnt) {
+		status = (uint64_t __rte_atomic *)&tim[cnt]->impl_opaque[0];
+
+		for (i = 0; i < nb_timers - cnt; i++) {
+			cnxk_tim_format_event(tim[cnt + i],
+					      (struct cnxk_tim_ent *)&lmt[(i << 1) + 2]);
+			tim[cnt + i]->impl_opaque[0] = 0;
+			tim[cnt + i]->impl_opaque[1] = 0;
+			tim[cnt + i]->state = RTE_EVENT_TIMER_ARMED;
+		}
+
+		roc_lmt_submit_steorl(wdata, pa);
+		do {
+			rsp = rte_atomic_load_explicit(status, rte_memory_order_relaxed);
+			rsp &= 0xFUL;
+		} while (!rsp);
+
+		done += (nb_timers - cnt);
+		rsp &= 0xF;
+		if (rsp != 0x1) {
+			switch (rsp) {
+			case 0x3:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x4:
+				for (j = 0; j < nb_timers - cnt; j++)
+					tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+				done -= (nb_timers - cnt);
+				break;
+			case 0x2:
+			default:
+				for (j = 0; j < nb_timers - cnt; j++) {
+					if ((rte_atomic_load_explicit(
+						     (uint64_t __rte_atomic *)&tim[cnt + j]
+							     ->impl_opaque[0],
+						     rte_memory_order_relaxed) &
+					     0xF0) != 0x10) {
+						tim[cnt + j]->state = RTE_EVENT_TIMER_ERROR;
+						done--;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+done:
+	return done;
+}
+
 static int
 cnxk_tim_rm_entry(struct rte_event_timer *tim)
 {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 181+ messages in thread

* Re: [PATCH v8 22/22] event/cnxk: add CN20K timer adapter
  2024-10-28 15:59               ` [PATCH v8 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
@ 2024-10-30 14:31                 ` Jerin Jacob
  0 siblings, 0 replies; 181+ messages in thread
From: Jerin Jacob @ 2024-10-30 14:31 UTC (permalink / raw)
  To: pbhagavatula
  Cc: jerinj, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Shijith Thotton, dev

On Mon, Oct 28, 2024 at 10:50 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add event timer adapter support for CN20K platform.
> Implement new HWWQE insertion feature supported by CN20K platform.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>


Series applied to dpdk-next-eventdev/for-main. Thanks.

^ permalink raw reply	[flat|nested] 181+ messages in thread

end of thread, other threads:[~2024-10-30 14:32 UTC | newest]

Thread overview: 181+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-10-03 13:22 [PATCH 01/20] common/cnxk: implement SSO HW info pbhagavatula
2024-10-03 13:22 ` [PATCH 02/20] event/cnxk: add CN20K specific device probe pbhagavatula
2024-10-15 16:17   ` Stephen Hemminger
2024-10-03 13:22 ` [PATCH 03/20] event/cnxk: add CN20K device config pbhagavatula
2024-10-03 13:22 ` [PATCH 04/20] event/cnxk: add CN20k event queue config pbhagavatula
2024-10-03 13:22 ` [PATCH 05/20] event/cnxk: add CN20K event port configuration pbhagavatula
2024-10-03 13:22 ` [PATCH 06/20] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
2024-10-22  1:46   ` Stephen Hemminger
2024-10-03 13:22 ` [PATCH 07/20] event/cnxk: add CN20K SSO dequeue " pbhagavatula
2024-10-22  1:49   ` Stephen Hemminger
2024-10-22  8:54     ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula
2024-10-03 13:22 ` [PATCH 08/20] event/cnxk: add CN20K event port quiesce pbhagavatula
2024-10-03 13:22 ` [PATCH 09/20] event/cnxk: add CN20K event port profile switch pbhagavatula
2024-10-03 13:22 ` [PATCH 10/20] event/cnxk: add CN20K device start pbhagavatula
2024-10-03 13:22 ` [PATCH 11/20] event/cnxk: add CN20K device stop and close pbhagavatula
2024-10-03 13:22 ` [PATCH 12/20] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
2024-10-03 13:22 ` [PATCH 13/20] event/cnxk: support CN20K Rx adapter pbhagavatula
2024-10-03 13:22 ` [PATCH 14/20] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
2024-10-03 13:22 ` [PATCH 15/20] event/cnxk: support CN20K Tx adapter pbhagavatula
2024-10-03 13:22 ` [PATCH 16/20] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
2024-10-03 13:22 ` [PATCH 17/20] common/cnxk: add SSO event aggregator pbhagavatula
2024-10-03 13:22 ` [PATCH 18/20] event/cnxk: add Rx/Tx event vector support pbhagavatula
2024-10-03 13:22 ` [PATCH 19/20] common/cnxk: update timer base code pbhagavatula
2024-10-03 13:22 ` [PATCH 20/20] event/cnxk: add CN20K timer adapter pbhagavatula
2024-10-21 20:57 ` [PATCH v2 01/21] common/cnxk: implement SSO HW info pbhagavatula
2024-10-21 20:57   ` [PATCH v2 02/21] event/cnxk: add CN20K specific device probe pbhagavatula
2024-10-21 20:57   ` [PATCH v2 03/21] event/cnxk: add CN20K device config pbhagavatula
2024-10-21 20:57   ` [PATCH v2 04/21] event/cnxk: add CN20k event queue configuration pbhagavatula
2024-10-21 20:57   ` [PATCH v2 05/21] event/cnxk: add CN20K event port configuration pbhagavatula
2024-10-21 20:57   ` [PATCH v2 06/21] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
2024-10-21 20:57   ` [PATCH v2 07/21] event/cnxk: add CN20K SSO dequeue " pbhagavatula
2024-10-21 20:57   ` [PATCH v2 08/21] event/cnxk: add CN20K event port quiesce pbhagavatula
2024-10-21 20:57   ` [PATCH v2 09/21] event/cnxk: add CN20K event port profile switch pbhagavatula
2024-10-21 20:57   ` [PATCH v2 10/21] event/cnxk: add CN20K event port preschedule pbhagavatula
2024-10-21 20:57   ` [PATCH v2 11/21] event/cnxk: add CN20K device start pbhagavatula
2024-10-21 20:57   ` [PATCH v2 12/21] event/cnxk: add CN20K device stop and close pbhagavatula
2024-10-21 20:57   ` [PATCH v2 13/21] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
2024-10-21 20:57   ` [PATCH v2 14/21] event/cnxk: support CN20K Rx adapter pbhagavatula
2024-10-21 20:57   ` [PATCH v2 15/21] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
2024-10-21 20:57   ` [PATCH v2 16/21] event/cnxk: support CN20K Tx adapter pbhagavatula
2024-10-21 20:57   ` [PATCH v2 17/21] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
2024-10-21 20:57   ` [PATCH v2 18/21] common/cnxk: add SSO event aggregator pbhagavatula
2024-10-21 20:57   ` [PATCH v2 19/21] event/cnxk: add Rx/Tx event vector support pbhagavatula
2024-10-21 20:57   ` [PATCH v2 20/21] common/cnxk: update timer base code pbhagavatula
2024-10-21 20:57   ` [PATCH v2 21/21] event/cnxk: add CN20K timer adapter pbhagavatula
2024-10-22  8:46   ` [PATCH v3 01/22] event/cnxk: use stdatomic API pbhagavatula
2024-10-22  8:46     ` [PATCH v3 02/22] common/cnxk: implement SSO HW info pbhagavatula
2024-10-22  8:46     ` [PATCH v3 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
2024-10-22  8:46     ` [PATCH v3 04/22] event/cnxk: add CN20K device config pbhagavatula
2024-10-22  8:46     ` [PATCH v3 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
2024-10-22  8:46     ` [PATCH v3 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
2024-10-22  8:46     ` [PATCH v3 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
2024-10-22  8:46     ` [PATCH v3 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
2024-10-22  8:46     ` [PATCH v3 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
2024-10-22  8:46     ` [PATCH v3 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
2024-10-22  8:46     ` [PATCH v3 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
2024-10-22  8:46     ` [PATCH v3 12/22] event/cnxk: add CN20K device start pbhagavatula
2024-10-22  8:46     ` [PATCH v3 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
2024-10-22  8:46     ` [PATCH v3 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
2024-10-22  8:46     ` [PATCH v3 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
2024-10-22  8:46     ` [PATCH v3 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
2024-10-22  8:46     ` [PATCH v3 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
2024-10-22  8:46     ` [PATCH v3 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
2024-10-22  8:46     ` [PATCH v3 19/22] common/cnxk: add SSO event aggregator pbhagavatula
2024-10-22  8:46     ` [PATCH v3 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
2024-10-22  8:46     ` [PATCH v3 21/22] common/cnxk: update timer base code pbhagavatula
2024-10-22  8:46     ` [PATCH v3 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
2024-10-22 19:34     ` [PATCH v4 01/22] event/cnxk: use stdatomic API pbhagavatula
2024-10-22 19:34       ` [PATCH v4 02/22] common/cnxk: implement SSO HW info pbhagavatula
2024-10-22 19:34       ` [PATCH v4 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
2024-10-22 19:34       ` [PATCH v4 04/22] event/cnxk: add CN20K device config pbhagavatula
2024-10-22 19:34       ` [PATCH v4 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
2024-10-22 19:34       ` [PATCH v4 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
2024-10-22 19:34       ` [PATCH v4 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
2024-10-22 19:34       ` [PATCH v4 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
2024-10-22 19:34       ` [PATCH v4 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
2024-10-22 19:34       ` [PATCH v4 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
2024-10-22 19:34       ` [PATCH v4 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
2024-10-22 19:34       ` [PATCH v4 12/22] event/cnxk: add CN20K device start pbhagavatula
2024-10-22 19:34       ` [PATCH v4 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
2024-10-22 19:34       ` [PATCH v4 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
2024-10-22 19:34       ` [PATCH v4 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
2024-10-22 19:34       ` [PATCH v4 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
2024-10-22 19:35       ` [PATCH v4 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
2024-10-22 19:35       ` [PATCH v4 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
2024-10-22 19:35       ` [PATCH v4 19/22] common/cnxk: add SSO event aggregator pbhagavatula
2024-10-22 19:35       ` [PATCH v4 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
2024-10-22 19:35       ` [PATCH v4 21/22] common/cnxk: update timer base code pbhagavatula
2024-10-22 19:35       ` [PATCH v4 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
2024-10-25  6:37       ` [PATCH v4 01/22] event/cnxk: use stdatomic API Jerin Jacob
2024-10-25  8:13       ` [PATCH v5 " pbhagavatula
2024-10-25  8:13         ` [PATCH v5 02/22] common/cnxk: implement SSO HW info pbhagavatula
2024-10-25  8:13         ` [PATCH v5 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
2024-10-25  8:13         ` [PATCH v5 04/22] event/cnxk: add CN20K device config pbhagavatula
2024-10-25  8:13         ` [PATCH v5 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
2024-10-25  8:13         ` [PATCH v5 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
2024-10-25  8:13         ` [PATCH v5 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
2024-10-25  8:13         ` [PATCH v5 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
2024-10-25  8:13         ` [PATCH v5 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
2024-10-25  8:13         ` [PATCH v5 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
2024-10-25  8:13         ` [PATCH v5 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
2024-10-25  8:13         ` [PATCH v5 12/22] event/cnxk: add CN20K device start pbhagavatula
2024-10-25  8:13         ` [PATCH v5 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
2024-10-25  8:13         ` [PATCH v5 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
2024-10-25  8:13         ` [PATCH v5 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
2024-10-25  8:13         ` [PATCH v5 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
2024-10-25  8:13         ` [PATCH v5 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
2024-10-25  8:13         ` [PATCH v5 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
2024-10-25  8:13         ` [PATCH v5 19/22] common/cnxk: add SSO event aggregator pbhagavatula
2024-10-25  8:13         ` [PATCH v5 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
2024-10-25  8:13         ` [PATCH v5 21/22] common/cnxk: update timer base code pbhagavatula
2024-10-25  8:13         ` [PATCH v5 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
2024-10-25 12:29         ` [PATCH v6 01/22] event/cnxk: use stdatomic API pbhagavatula
2024-10-25 12:29           ` [PATCH v6 02/22] common/cnxk: implement SSO HW info pbhagavatula
2024-10-25 12:29           ` [PATCH v6 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
2024-10-25 12:29           ` [PATCH v6 04/22] event/cnxk: add CN20K device config pbhagavatula
2024-10-25 12:29           ` [PATCH v6 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
2024-10-25 12:29           ` [PATCH v6 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
2024-10-25 12:29           ` [PATCH v6 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
2024-10-25 12:29           ` [PATCH v6 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
2024-10-25 12:29           ` [PATCH v6 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
2024-10-25 12:29           ` [PATCH v6 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
2024-10-25 12:29           ` [PATCH v6 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
2024-10-25 12:29           ` [PATCH v6 12/22] event/cnxk: add CN20K device start pbhagavatula
2024-10-25 12:29           ` [PATCH v6 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
2024-10-25 12:29           ` [PATCH v6 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
2024-10-25 12:29           ` [PATCH v6 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
2024-10-25 12:29           ` [PATCH v6 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
2024-10-25 12:29           ` [PATCH v6 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
2024-10-25 12:29           ` [PATCH v6 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
2024-10-25 12:29           ` [PATCH v6 19/22] common/cnxk: add SSO event aggregator pbhagavatula
2024-10-25 12:29           ` [PATCH v6 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
2024-10-25 12:29           ` [PATCH v6 21/22] common/cnxk: update timer base code pbhagavatula
2024-10-25 12:29           ` [PATCH v6 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
2024-10-25 13:03           ` [PATCH v7 01/22] event/cnxk: use stdatomic API pbhagavatula
2024-10-25 13:03             ` [PATCH v7 02/22] common/cnxk: implement SSO HW info pbhagavatula
2024-10-25 13:03             ` [PATCH v7 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
2024-10-25 13:03             ` [PATCH v7 04/22] event/cnxk: add CN20K device config pbhagavatula
2024-10-25 13:03             ` [PATCH v7 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
2024-10-25 13:03             ` [PATCH v7 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
2024-10-25 13:03             ` [PATCH v7 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
2024-10-25 13:03             ` [PATCH v7 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
2024-10-25 13:03             ` [PATCH v7 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
2024-10-25 13:03             ` [PATCH v7 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
2024-10-25 13:03             ` [PATCH v7 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
2024-10-25 13:03             ` [PATCH v7 12/22] event/cnxk: add CN20K device start pbhagavatula
2024-10-25 13:03             ` [PATCH v7 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
2024-10-25 13:03             ` [PATCH v7 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
2024-10-25 13:03             ` [PATCH v7 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
2024-10-25 13:03             ` [PATCH v7 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
2024-10-25 13:03             ` [PATCH v7 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
2024-10-25 13:03             ` [PATCH v7 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
2024-10-25 13:03             ` [PATCH v7 19/22] common/cnxk: add SSO event aggregator pbhagavatula
2024-10-25 13:03             ` [PATCH v7 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
2024-10-25 13:03             ` [PATCH v7 21/22] common/cnxk: update timer base code pbhagavatula
2024-10-25 13:03             ` [PATCH v7 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
2024-10-28 15:59             ` [PATCH v8 01/22] event/cnxk: use stdatomic API pbhagavatula
2024-10-28 15:59               ` [PATCH v8 02/22] common/cnxk: implement SSO HW info pbhagavatula
2024-10-28 15:59               ` [PATCH v8 03/22] event/cnxk: add CN20K specific device probe pbhagavatula
2024-10-28 15:59               ` [PATCH v8 04/22] event/cnxk: add CN20K device config pbhagavatula
2024-10-28 15:59               ` [PATCH v8 05/22] event/cnxk: add CN20k event queue configuration pbhagavatula
2024-10-28 15:59               ` [PATCH v8 06/22] event/cnxk: add CN20K event port configuration pbhagavatula
2024-10-28 15:59               ` [PATCH v8 07/22] event/cnxk: add CN20K SSO enqueue fast path pbhagavatula
2024-10-28 15:59               ` [PATCH v8 08/22] event/cnxk: add CN20K SSO dequeue " pbhagavatula
2024-10-28 15:59               ` [PATCH v8 09/22] event/cnxk: add CN20K event port quiesce pbhagavatula
2024-10-28 15:59               ` [PATCH v8 10/22] event/cnxk: add CN20K event port profile switch pbhagavatula
2024-10-28 15:59               ` [PATCH v8 11/22] event/cnxk: add CN20K event port preschedule pbhagavatula
2024-10-28 15:59               ` [PATCH v8 12/22] event/cnxk: add CN20K device start pbhagavatula
2024-10-28 15:59               ` [PATCH v8 13/22] event/cnxk: add CN20K device stop and close pbhagavatula
2024-10-28 15:59               ` [PATCH v8 14/22] event/cnxk: add CN20K xstats, selftest and dump pbhagavatula
2024-10-28 15:59               ` [PATCH v8 15/22] event/cnxk: support CN20K Rx adapter pbhagavatula
2024-10-28 15:59               ` [PATCH v8 16/22] event/cnxk: support CN20K Rx adapter fast path pbhagavatula
2024-10-28 15:59               ` [PATCH v8 17/22] event/cnxk: support CN20K Tx adapter pbhagavatula
2024-10-28 15:59               ` [PATCH v8 18/22] event/cnxk: support CN20K Tx adapter fast path pbhagavatula
2024-10-28 15:59               ` [PATCH v8 19/22] common/cnxk: add SSO event aggregator pbhagavatula
2024-10-28 15:59               ` [PATCH v8 20/22] event/cnxk: add Rx/Tx event vector support pbhagavatula
2024-10-28 15:59               ` [PATCH v8 21/22] common/cnxk: update timer base code pbhagavatula
2024-10-28 15:59               ` [PATCH v8 22/22] event/cnxk: add CN20K timer adapter pbhagavatula
2024-10-30 14:31                 ` Jerin Jacob
2024-10-22  1:52 ` [PATCH 01/20] common/cnxk: implement SSO HW info Stephen Hemminger
2024-10-22  8:53   ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).