DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality
@ 2020-11-22 21:18 pbhagavatula
  2021-01-12  8:39 ` [dpdk-dev] [PATCH v2] " pbhagavatula
  2021-01-26  9:57 ` [dpdk-dev] [PATCH] " Jerin Jacob
  0 siblings, 2 replies; 3+ messages in thread
From: pbhagavatula @ 2020-11-22 21:18 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Anoob Joseph, Nithin Dabilpuram, Kiran Kumar K
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Enhance Tx path cache locality, remove current tag type and group
stores from datapath to conserve store buffers.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/octeontx2/otx2_evdev.c       | 15 ++----
 drivers/event/octeontx2/otx2_evdev.h       | 24 ++++-----
 drivers/event/octeontx2/otx2_worker.c      | 42 +++++----------
 drivers/event/octeontx2/otx2_worker.h      | 32 +++++------
 drivers/event/octeontx2/otx2_worker_dual.c | 63 ++++++++++------------
 drivers/event/octeontx2/otx2_worker_dual.h |  2 -
 drivers/net/octeontx2/otx2_ethdev_sec_tx.h |  9 ++--
 7 files changed, 74 insertions(+), 113 deletions(-)

diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c
index 0fe014c24..14f16a68f 100644
--- a/drivers/event/octeontx2/otx2_evdev.c
+++ b/drivers/event/octeontx2/otx2_evdev.c
@@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev)
 		ws->port = i;
 		base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
 		sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base);
+		ws->base[0] = base;
 		vws++;
 
 		base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
 		sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base);
+		ws->base[1] = base;
 		vws++;
 
 		gws_cookie = ssogws_get_cookie(ws);
@@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev)
 		ws->port = i;
 		base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12);
 		sso_set_port_ops(ws, base);
+		ws->base = base;
 
 		gws_cookie = ssogws_get_cookie(ws);
 		gws_cookie->event_dev = event_dev;
@@ -1447,20 +1450,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
 			ws = event_dev->data->ports[i];
 			ssogws_reset((struct otx2_ssogws *)&ws->ws_state[0]);
 			ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]);
-			ws->swtag_req = 0;
 			ws->vws = 0;
-			ws->ws_state[0].cur_grp = 0;
-			ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
-			ws->ws_state[1].cur_grp = 0;
-			ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY;
 		} else {
 			struct otx2_ssogws *ws;
 
 			ws = event_dev->data->ports[i];
 			ssogws_reset(ws);
-			ws->swtag_req = 0;
-			ws->cur_grp = 0;
-			ws->cur_tt = SSO_SYNC_EMPTY;
 		}
 	}
 
@@ -1479,8 +1474,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
 			otx2_write64(enable, ws->grps_base[i] +
 				     SSO_LF_GGRP_QCTL);
 		}
-		ws->ws_state[0].cur_grp = 0;
-		ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
 	} else {
 		struct otx2_ssogws *ws = event_dev->data->ports[0];
 
@@ -1492,8 +1485,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
 			otx2_write64(enable, ws->grps_base[i] +
 				     SSO_LF_GGRP_QCTL);
 		}
-		ws->cur_grp = 0;
-		ws->cur_tt = SSO_SYNC_EMPTY;
 	}
 
 	/* reset SSO GWS cache */
diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
index 0513cb81c..e381b9e52 100644
--- a/drivers/event/octeontx2/otx2_evdev.h
+++ b/drivers/event/octeontx2/otx2_evdev.h
@@ -80,6 +80,8 @@
 
 #define OTX2_SSOW_GET_BASE_ADDR(_GW)        ((_GW) - SSOW_LF_GWS_OP_GET_WORK)
 #define OTX2_SSOW_TT_FROM_TAG(x)	    (((x) >> 32) & SSO_TT_EMPTY)
+#define OTX2_SSOW_GRP_FROM_TAG(x)	    (((x) >> 36) & 0x3ff)
+#define OTX2_SSOW_SWTAG_PEND(x)		    ((x) & BIT_ULL(62))
 
 #define NSEC2USEC(__ns)			((__ns) / 1E3)
 #define USEC2NSEC(__us)                 ((__us) * 1E3)
@@ -169,25 +171,23 @@ struct otx2_sso_evdev {
 	uintptr_t wqp_op;                                                      \
 	uintptr_t swtag_flush_op;                                              \
 	uintptr_t swtag_norm_op;                                               \
-	uintptr_t swtag_desched_op;                                            \
-	uint8_t cur_tt;                                                        \
-	uint8_t cur_grp
+	uintptr_t swtag_desched_op;
 
 /* Event port aka GWS */
 struct otx2_ssogws {
 	/* Get Work Fastpath data */
 	OTX2_SSOGWS_OPS;
-	uint8_t swtag_req;
+	/* PTP timestamp */
+	struct otx2_timesync_info *tstamp;
 	void *lookup_mem;
 	uint8_t port;
 	/* Add Work Fastpath data */
 	uint64_t xaq_lmt __rte_cache_aligned;
 	uint64_t *fc_mem;
 	uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
-	/* PTP timestamp */
-	struct otx2_timesync_info *tstamp;
 	/* Tx Fastpath data */
-	uint8_t tx_adptr_data[] __rte_cache_aligned;
+	uint64_t base __rte_cache_aligned;
+	uint8_t tx_adptr_data[];
 } __rte_cache_aligned;
 
 struct otx2_ssogws_state {
@@ -197,18 +197,18 @@ struct otx2_ssogws_state {
 struct otx2_ssogws_dual {
 	/* Get Work Fastpath data */
 	struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */
-	uint8_t swtag_req;
-	uint8_t vws; /* Ping pong bit */
+	/* PTP timestamp */
+	struct otx2_timesync_info *tstamp;
 	void *lookup_mem;
+	uint8_t vws; /* Ping pong bit */
 	uint8_t port;
 	/* Add Work Fastpath data */
 	uint64_t xaq_lmt __rte_cache_aligned;
 	uint64_t *fc_mem;
 	uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
-	/* PTP timestamp */
-	struct otx2_timesync_info *tstamp;
 	/* Tx Fastpath data */
-	uint8_t tx_adptr_data[] __rte_cache_aligned;
+	uint64_t base[2] __rte_cache_aligned;
+	uint8_t tx_adptr_data[];
 } __rte_cache_aligned;
 
 static inline struct otx2_sso_evdev *
diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
index b098407e0..7ed836c1e 100644
--- a/drivers/event/octeontx2/otx2_worker.c
+++ b/drivers/event/octeontx2/otx2_worker.c
@@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
 {
 	const uint32_t tag = (uint32_t)ev->event;
 	const uint8_t new_tt = ev->sched_type;
-	const uint8_t cur_tt = ws->cur_tt;
+	const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
 
 	/* 96XX model
 	 * cur_tt/new_tt     SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -41,8 +41,6 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
 	} else {
 		otx2_ssogws_swtag_norm(ws, tag, new_tt);
 	}
-
-	ws->swtag_req = 1;
 }
 
 static __rte_always_inline void
@@ -64,7 +62,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
 	const uint8_t grp = ev->queue_id;
 
 	/* Group hasn't changed, Use SWTAG to forward the event */
-	if (ws->cur_grp == grp)
+	if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp)
 		otx2_ssogws_fwd_swtag(ws, ev);
 	else
 	/*
@@ -75,12 +73,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
 		otx2_ssogws_fwd_group(ws, ev, grp);
 }
 
-static __rte_always_inline void
-otx2_ssogws_release_event(struct otx2_ssogws *ws)
-{
-	otx2_ssogws_swtag_flush(ws);
-}
-
 #define R(name, f6, f5, f4, f3, f2, f1, f0, flags)			\
 uint16_t __rte_hot								\
 otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev,		\
@@ -90,8 +82,7 @@ otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev,		\
 									\
 	RTE_SET_USED(timeout_ticks);					\
 									\
-	if (ws->swtag_req) {						\
-		ws->swtag_req = 0;					\
+	if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) {		\
 		otx2_ssogws_swtag_wait(ws);				\
 		return 1;						\
 	}								\
@@ -117,8 +108,7 @@ otx2_ssogws_deq_timeout_ ##name(void *port, struct rte_event *ev,	\
 	uint16_t ret = 1;						\
 	uint64_t iter;							\
 									\
-	if (ws->swtag_req) {						\
-		ws->swtag_req = 0;					\
+	if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) {		\
 		otx2_ssogws_swtag_wait(ws);				\
 		return ret;						\
 	}								\
@@ -149,8 +139,7 @@ otx2_ssogws_deq_seg_ ##name(void *port, struct rte_event *ev,		\
 									\
 	RTE_SET_USED(timeout_ticks);					\
 									\
-	if (ws->swtag_req) {						\
-		ws->swtag_req = 0;					\
+	if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) {		\
 		otx2_ssogws_swtag_wait(ws);				\
 		return 1;						\
 	}								\
@@ -177,8 +166,7 @@ otx2_ssogws_deq_seg_timeout_ ##name(void *port, struct rte_event *ev,	\
 	uint16_t ret = 1;						\
 	uint64_t iter;							\
 									\
-	if (ws->swtag_req) {						\
-		ws->swtag_req = 0;					\
+	if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) {		\
 		otx2_ssogws_swtag_wait(ws);				\
 		return ret;						\
 	}								\
@@ -221,7 +209,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev)
 		otx2_ssogws_forward_event(ws, ev);
 		break;
 	case RTE_EVENT_OP_RELEASE:
-		otx2_ssogws_release_event(ws);
+		otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
 		break;
 	default:
 		return 0;
@@ -274,14 +262,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 {									\
 	struct otx2_ssogws *ws = port;					\
 	uint64_t cmd[sz];						\
-	int i;								\
 									\
-	for (i = 0; i < nb_events; i++)					\
-		otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t	\
+	RTE_SET_USED(nb_events);					\
+	return otx2_ssogws_event_tx(ws->base, &ev[0], cmd,		\
+				    (const uint64_t			\
 				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
 				    &ws->tx_adptr_data,			\
 				    flags);				\
-	return nb_events;						\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -293,14 +280,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
 {									\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
 	struct otx2_ssogws *ws = port;					\
-	int i;								\
 									\
-	for (i = 0; i < nb_events; i++)					\
-		otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t	\
+	RTE_SET_USED(nb_events);					\
+	return otx2_ssogws_event_tx(ws->base, &ev[0], cmd,		\
+				    (const uint64_t			\
 				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
 				    &ws->tx_adptr_data,			\
 				    (flags) | NIX_TX_MULTI_SEG_F);	\
-	return nb_events;						\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -335,7 +321,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base,
 		if (fn != NULL && ev.u64 != 0)
 			fn(arg, ev);
 		if (ev.sched_type != SSO_TT_EMPTY)
-			otx2_ssogws_swtag_flush(ws);
+			otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
 		rte_mb();
 		aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT);
 		ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT);
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 0a7d6671c..2b716c042 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev,
 	event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
 		(event.get_work0 & (0x3FFull << 36)) << 4 |
 		(event.get_work0 & 0xffffffff);
-	ws->cur_tt = event.sched_type;
-	ws->cur_grp = event.queue_id;
 
 	if (event.sched_type != SSO_TT_EMPTY) {
 		if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
@@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev,
 	event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
 		(event.get_work0 & (0x3FFull << 36)) << 4 |
 		(event.get_work0 & 0xffffffff);
-	ws->cur_tt = event.sched_type;
-	ws->cur_grp = event.queue_id;
 
 	if (event.sched_type != SSO_TT_EMPTY &&
 	    event.event_type == RTE_EVENT_TYPE_ETHDEV) {
@@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
 {
 	otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
 		     SSOW_LF_GWS_OP_SWTAG_UNTAG);
-	ws->cur_tt = SSO_SYNC_UNTAGGED;
 }
 
 static __rte_always_inline void
-otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
+otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
 {
-	if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) {
-		ws->cur_tt = SSO_SYNC_EMPTY;
+	if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY)
 		return;
-	}
-	otx2_write64(0, ws->swtag_flush_op);
-	ws->cur_tt = SSO_SYNC_EMPTY;
+	otx2_write64(0, flush_op);
 }
 
 static __rte_always_inline void
@@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws)
 }
 
 static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws)
+otx2_ssogws_head_wait(uint64_t tag_op)
 {
 #ifdef RTE_ARCH_ARM64
 	uint64_t tag;
@@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws)
 			"	tbz %[tag], 35, rty%=		\n"
 			"done%=:				\n"
 			: [tag] "=&r" (tag)
-			: [tag_op] "r" (ws->tag_op)
+			: [tag_op] "r" (tag_op)
 			);
 #else
 	/* Wait for the HEAD to be set */
-	while (!(otx2_read64(ws->tag_op) & BIT_ULL(35)))
+	while (!(otx2_read64(tag_op) & BIT_ULL(35)))
 		;
 #endif
 }
@@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
 }
 
 static __rte_always_inline uint16_t
-otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
-		     uint64_t *cmd,
+otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
 		     const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
 		     const uint32_t flags)
 {
@@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 	if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
 	    (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
 		txq = otx2_ssogws_xtract_meta(m, txq_data);
-		return otx2_sec_event_tx(ws, ev, m, txq, flags);
+		return otx2_sec_event_tx(base, ev, m, txq, flags);
 	}
 
 	/* Perform header writes before barrier for TSO */
@@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 					     m->ol_flags, segdw, flags);
 		if (!ev->sched_type) {
 			otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
-			otx2_ssogws_head_wait(ws);
+			otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
 			if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
 				otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr,
 						       txq->io_addr, segdw);
@@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 
 		if (!ev->sched_type) {
 			otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
-			otx2_ssogws_head_wait(ws);
+			otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
 			if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
 				otx2_nix_xmit_one(cmd, txq->lmt_addr,
 						  txq->io_addr, flags);
@@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 			return 1;
 	}
 
-	otx2_ssogws_swtag_flush(ws);
+	otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG,
+				base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
 
 	return 1;
 }
diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
index 946488eab..820455788 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.c
+++ b/drivers/event/octeontx2/otx2_worker_dual.c
@@ -26,9 +26,9 @@ static __rte_always_inline void
 otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws,
 			   const struct rte_event *ev)
 {
+	const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
 	const uint32_t tag = (uint32_t)ev->event;
 	const uint8_t new_tt = ev->sched_type;
-	const uint8_t cur_tt = ws->cur_tt;
 
 	/* 96XX model
 	 * cur_tt/new_tt     SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -59,22 +59,20 @@ otx2_ssogws_dual_fwd_group(struct otx2_ssogws_state *ws,
 }
 
 static __rte_always_inline void
-otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws,
-			       struct otx2_ssogws_state *vws,
+otx2_ssogws_dual_forward_event(struct otx2_ssogws_state *vws,
 			       const struct rte_event *ev)
 {
 	const uint8_t grp = ev->queue_id;
 
 	/* Group hasn't changed, Use SWTAG to forward the event */
-	if (vws->cur_grp == grp) {
+	if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) {
 		otx2_ssogws_dual_fwd_swtag(vws, ev);
-		ws->swtag_req = 1;
 	} else {
-	/*
-	 * Group has been changed for group based work pipelining,
-	 * Use deschedule/add_work operation to transfer the event to
-	 * new group/core
-	 */
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
 		otx2_ssogws_dual_fwd_group(vws, ev, grp);
 	}
 }
@@ -90,10 +88,10 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev)
 		rte_smp_mb();
 		return otx2_ssogws_dual_new_event(ws, ev);
 	case RTE_EVENT_OP_FORWARD:
-		otx2_ssogws_dual_forward_event(ws, vws, ev);
+		otx2_ssogws_dual_forward_event(vws, ev);
 		break;
 	case RTE_EVENT_OP_RELEASE:
-		otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws);
+		otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op);
 		break;
 	default:
 		return 0;
@@ -135,7 +133,7 @@ otx2_ssogws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
 	struct otx2_ssogws_state *vws = &ws->ws_state[!ws->vws];
 
 	RTE_SET_USED(nb_events);
-	otx2_ssogws_dual_forward_event(ws, vws, ev);
+	otx2_ssogws_dual_forward_event(vws, ev);
 
 	return 1;
 }
@@ -150,10 +148,10 @@ otx2_ssogws_dual_deq_ ##name(void *port, struct rte_event *ev,		\
 									\
 	rte_prefetch_non_temporal(ws);					\
 	RTE_SET_USED(timeout_ticks);					\
-	if (ws->swtag_req) {						\
+	if (OTX2_SSOW_SWTAG_PEND(otx2_read64(				\
+		ws->ws_state[!ws->vws].tag_op))) {			\
 		otx2_ssogws_swtag_wait((struct otx2_ssogws *)		\
 				       &ws->ws_state[!ws->vws]);	\
-		ws->swtag_req = 0;					\
 		return 1;						\
 	}								\
 									\
@@ -184,10 +182,10 @@ otx2_ssogws_dual_deq_timeout_ ##name(void *port, struct rte_event *ev,	\
 	uint64_t iter;							\
 	uint8_t gw;							\
 									\
-	if (ws->swtag_req) {						\
+	if (OTX2_SSOW_SWTAG_PEND(otx2_read64(				\
+		ws->ws_state[!ws->vws].tag_op))) {			\
 		otx2_ssogws_swtag_wait((struct otx2_ssogws *)		\
 				       &ws->ws_state[!ws->vws]);	\
-		ws->swtag_req = 0;					\
 		return 1;						\
 	}								\
 									\
@@ -228,10 +226,10 @@ otx2_ssogws_dual_deq_seg_ ##name(void *port, struct rte_event *ev,	\
 	uint8_t gw;							\
 									\
 	RTE_SET_USED(timeout_ticks);					\
-	if (ws->swtag_req) {						\
+	if (OTX2_SSOW_SWTAG_PEND(otx2_read64(				\
+		ws->ws_state[!ws->vws].tag_op))) {			\
 		otx2_ssogws_swtag_wait((struct otx2_ssogws *)		\
 				       &ws->ws_state[!ws->vws]);	\
-		ws->swtag_req = 0;					\
 		return 1;						\
 	}								\
 									\
@@ -266,10 +264,10 @@ otx2_ssogws_dual_deq_seg_timeout_ ##name(void *port,			\
 	uint64_t iter;							\
 	uint8_t gw;							\
 									\
-	if (ws->swtag_req) {						\
+	if (OTX2_SSOW_SWTAG_PEND(otx2_read64(				\
+		ws->ws_state[!ws->vws].tag_op))) {			\
 		otx2_ssogws_swtag_wait((struct otx2_ssogws *)		\
 				       &ws->ws_state[!ws->vws]);	\
-		ws->swtag_req = 0;					\
 		return 1;						\
 	}								\
 									\
@@ -314,15 +312,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 				       uint16_t nb_events)		\
 {									\
 	struct otx2_ssogws_dual *ws = port;				\
-	struct otx2_ssogws *vws =					\
-		(struct otx2_ssogws *)&ws->ws_state[!ws->vws];		\
 	uint64_t cmd[sz];						\
 									\
 	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t	\
-				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
-				    ws->tx_adptr_data,			\
-				    flags);				\
+	return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0],		\
+					  cmd, (const uint64_t		\
+					  (*)[RTE_MAX_QUEUES_PER_PORT])	\
+					  &ws->tx_adptr_data, flags);	\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -333,16 +329,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,			\
 					   struct rte_event ev[],	\
 					   uint16_t nb_events)		\
 {									\
-	struct otx2_ssogws_dual *ws = port;				\
-	struct otx2_ssogws *vws =					\
-		(struct otx2_ssogws *)&ws->ws_state[!ws->vws];		\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
+	struct otx2_ssogws_dual *ws = port;				\
 									\
 	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t	\
-				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
-				    ws->tx_adptr_data,			\
-				    (flags) | NIX_TX_MULTI_SEG_F);	\
+	return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0],		\
+					  cmd, (const uint64_t		\
+					  (*)[RTE_MAX_QUEUES_PER_PORT])	\
+					  &ws->tx_adptr_data,		\
+					  (flags) | NIX_TX_MULTI_SEG_F);\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h
index 6e6061821..72b616439 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.h
+++ b/drivers/event/octeontx2/otx2_worker_dual.h
@@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws,
 	event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
 		(event.get_work0 & (0x3FFull << 36)) << 4 |
 		(event.get_work0 & 0xffffffff);
-	ws->cur_tt = event.sched_type;
-	ws->cur_grp = event.queue_id;
 
 	if (event.sched_type != SSO_TT_EMPTY) {
 		if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
index 284bcd536..c8eae3d62 100644
--- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
+++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
@@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess,
 }
 
 static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws);
+otx2_ssogws_head_wait(uint64_t base);
 
 static __rte_always_inline int
-otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
-		  struct rte_mbuf *m, const struct otx2_eth_txq *txq,
-		  const uint32_t offload_flags)
+otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m,
+		  const struct otx2_eth_txq *txq, const uint32_t offload_flags)
 {
 	uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail;
 	struct otx2_sec_session_ipsec_ip *sess;
@@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 	__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
 
 	if (!ev->sched_type)
-		otx2_ssogws_head_wait(ws);
+		otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
 
 	inst.param1 = sess->esn_hi >> 16;
 	inst.param2 = sess->esn_hi & 0xffff;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [dpdk-dev] [PATCH v2] event/octeontx2: enhance Tx path cache locality
  2020-11-22 21:18 [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality pbhagavatula
@ 2021-01-12  8:39 ` pbhagavatula
  2021-01-26  9:57 ` [dpdk-dev] [PATCH] " Jerin Jacob
  1 sibling, 0 replies; 3+ messages in thread
From: pbhagavatula @ 2021-01-12  8:39 UTC (permalink / raw)
  To: jerinj, Pavan Nikhilesh, Anoob Joseph, Nithin Dabilpuram, Kiran Kumar K
  Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Enhance Tx path cache locality, remove current tag type and group
stores from datapath to conserve store buffers.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 - SWTAG can complete asynchronously, we need to maintain swtag_req in ws
 datastructure.

 drivers/event/octeontx2/otx2_evdev.c       | 13 ++------
 drivers/event/octeontx2/otx2_evdev.h       | 23 +++++++------
 drivers/event/octeontx2/otx2_worker.c      | 28 ++++++----------
 drivers/event/octeontx2/otx2_worker.h      | 32 +++++++-----------
 drivers/event/octeontx2/otx2_worker_dual.c | 39 ++++++++++------------
 drivers/event/octeontx2/otx2_worker_dual.h |  2 --
 drivers/net/octeontx2/otx2_ethdev_sec_tx.h |  9 +++--
 7 files changed, 59 insertions(+), 87 deletions(-)

diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c
index 0fe014c24..80a786f21 100644
--- a/drivers/event/octeontx2/otx2_evdev.c
+++ b/drivers/event/octeontx2/otx2_evdev.c
@@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev)
 		ws->port = i;
 		base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
 		sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base);
+		ws->base[0] = base;
 		vws++;

 		base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
 		sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base);
+		ws->base[1] = base;
 		vws++;

 		gws_cookie = ssogws_get_cookie(ws);
@@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev)
 		ws->port = i;
 		base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12);
 		sso_set_port_ops(ws, base);
+		ws->base = base;

 		gws_cookie = ssogws_get_cookie(ws);
 		gws_cookie->event_dev = event_dev;
@@ -1449,18 +1452,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
 			ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]);
 			ws->swtag_req = 0;
 			ws->vws = 0;
-			ws->ws_state[0].cur_grp = 0;
-			ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
-			ws->ws_state[1].cur_grp = 0;
-			ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY;
 		} else {
 			struct otx2_ssogws *ws;

 			ws = event_dev->data->ports[i];
 			ssogws_reset(ws);
 			ws->swtag_req = 0;
-			ws->cur_grp = 0;
-			ws->cur_tt = SSO_SYNC_EMPTY;
 		}
 	}

@@ -1479,8 +1476,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
 			otx2_write64(enable, ws->grps_base[i] +
 				     SSO_LF_GGRP_QCTL);
 		}
-		ws->ws_state[0].cur_grp = 0;
-		ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
 	} else {
 		struct otx2_ssogws *ws = event_dev->data->ports[0];

@@ -1492,8 +1487,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
 			otx2_write64(enable, ws->grps_base[i] +
 				     SSO_LF_GGRP_QCTL);
 		}
-		ws->cur_grp = 0;
-		ws->cur_tt = SSO_SYNC_EMPTY;
 	}

 	/* reset SSO GWS cache */
diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
index 0513cb81c..ed9cbc86b 100644
--- a/drivers/event/octeontx2/otx2_evdev.h
+++ b/drivers/event/octeontx2/otx2_evdev.h
@@ -80,6 +80,7 @@

 #define OTX2_SSOW_GET_BASE_ADDR(_GW)        ((_GW) - SSOW_LF_GWS_OP_GET_WORK)
 #define OTX2_SSOW_TT_FROM_TAG(x)	    (((x) >> 32) & SSO_TT_EMPTY)
+#define OTX2_SSOW_GRP_FROM_TAG(x)	    (((x) >> 36) & 0x3ff)

 #define NSEC2USEC(__ns)			((__ns) / 1E3)
 #define USEC2NSEC(__us)                 ((__us) * 1E3)
@@ -169,25 +170,24 @@ struct otx2_sso_evdev {
 	uintptr_t wqp_op;                                                      \
 	uintptr_t swtag_flush_op;                                              \
 	uintptr_t swtag_norm_op;                                               \
-	uintptr_t swtag_desched_op;                                            \
-	uint8_t cur_tt;                                                        \
-	uint8_t cur_grp
+	uintptr_t swtag_desched_op;

 /* Event port aka GWS */
 struct otx2_ssogws {
 	/* Get Work Fastpath data */
 	OTX2_SSOGWS_OPS;
-	uint8_t swtag_req;
+	/* PTP timestamp */
+	struct otx2_timesync_info *tstamp;
 	void *lookup_mem;
+	uint8_t swtag_req;
 	uint8_t port;
 	/* Add Work Fastpath data */
 	uint64_t xaq_lmt __rte_cache_aligned;
 	uint64_t *fc_mem;
 	uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
-	/* PTP timestamp */
-	struct otx2_timesync_info *tstamp;
 	/* Tx Fastpath data */
-	uint8_t tx_adptr_data[] __rte_cache_aligned;
+	uint64_t base __rte_cache_aligned;
+	uint8_t tx_adptr_data[];
 } __rte_cache_aligned;

 struct otx2_ssogws_state {
@@ -197,18 +197,19 @@ struct otx2_ssogws_state {
 struct otx2_ssogws_dual {
 	/* Get Work Fastpath data */
 	struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */
+	/* PTP timestamp */
+	struct otx2_timesync_info *tstamp;
+	void *lookup_mem;
 	uint8_t swtag_req;
 	uint8_t vws; /* Ping pong bit */
-	void *lookup_mem;
 	uint8_t port;
 	/* Add Work Fastpath data */
 	uint64_t xaq_lmt __rte_cache_aligned;
 	uint64_t *fc_mem;
 	uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
-	/* PTP timestamp */
-	struct otx2_timesync_info *tstamp;
 	/* Tx Fastpath data */
-	uint8_t tx_adptr_data[] __rte_cache_aligned;
+	uint64_t base[2] __rte_cache_aligned;
+	uint8_t tx_adptr_data[];
 } __rte_cache_aligned;

 static inline struct otx2_sso_evdev *
diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
index b098407e0..95139d27a 100644
--- a/drivers/event/octeontx2/otx2_worker.c
+++ b/drivers/event/octeontx2/otx2_worker.c
@@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
 {
 	const uint32_t tag = (uint32_t)ev->event;
 	const uint8_t new_tt = ev->sched_type;
-	const uint8_t cur_tt = ws->cur_tt;
+	const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));

 	/* 96XX model
 	 * cur_tt/new_tt     SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -64,7 +64,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
 	const uint8_t grp = ev->queue_id;

 	/* Group hasn't changed, Use SWTAG to forward the event */
-	if (ws->cur_grp == grp)
+	if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp)
 		otx2_ssogws_fwd_swtag(ws, ev);
 	else
 	/*
@@ -75,12 +75,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
 		otx2_ssogws_fwd_group(ws, ev, grp);
 }

-static __rte_always_inline void
-otx2_ssogws_release_event(struct otx2_ssogws *ws)
-{
-	otx2_ssogws_swtag_flush(ws);
-}
-
 #define R(name, f6, f5, f4, f3, f2, f1, f0, flags)			\
 uint16_t __rte_hot								\
 otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev,		\
@@ -221,7 +215,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev)
 		otx2_ssogws_forward_event(ws, ev);
 		break;
 	case RTE_EVENT_OP_RELEASE:
-		otx2_ssogws_release_event(ws);
+		otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
 		break;
 	default:
 		return 0;
@@ -274,14 +268,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],	\
 {									\
 	struct otx2_ssogws *ws = port;					\
 	uint64_t cmd[sz];						\
-	int i;								\
 									\
-	for (i = 0; i < nb_events; i++)					\
-		otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t	\
+	RTE_SET_USED(nb_events);					\
+	return otx2_ssogws_event_tx(ws->base, &ev[0], cmd,		\
+				    (const uint64_t			\
 				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
 				    &ws->tx_adptr_data,			\
 				    flags);				\
-	return nb_events;						\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -293,14 +286,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
 {									\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
 	struct otx2_ssogws *ws = port;					\
-	int i;								\
 									\
-	for (i = 0; i < nb_events; i++)					\
-		otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t	\
+	RTE_SET_USED(nb_events);					\
+	return otx2_ssogws_event_tx(ws->base, &ev[0], cmd,		\
+				    (const uint64_t			\
 				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
 				    &ws->tx_adptr_data,			\
 				    (flags) | NIX_TX_MULTI_SEG_F);	\
-	return nb_events;						\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -335,7 +327,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base,
 		if (fn != NULL && ev.u64 != 0)
 			fn(arg, ev);
 		if (ev.sched_type != SSO_TT_EMPTY)
-			otx2_ssogws_swtag_flush(ws);
+			otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
 		rte_mb();
 		aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT);
 		ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT);
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 0a7d6671c..2b716c042 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev,
 	event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
 		(event.get_work0 & (0x3FFull << 36)) << 4 |
 		(event.get_work0 & 0xffffffff);
-	ws->cur_tt = event.sched_type;
-	ws->cur_grp = event.queue_id;

 	if (event.sched_type != SSO_TT_EMPTY) {
 		if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
@@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev,
 	event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
 		(event.get_work0 & (0x3FFull << 36)) << 4 |
 		(event.get_work0 & 0xffffffff);
-	ws->cur_tt = event.sched_type;
-	ws->cur_grp = event.queue_id;

 	if (event.sched_type != SSO_TT_EMPTY &&
 	    event.event_type == RTE_EVENT_TYPE_ETHDEV) {
@@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
 {
 	otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
 		     SSOW_LF_GWS_OP_SWTAG_UNTAG);
-	ws->cur_tt = SSO_SYNC_UNTAGGED;
 }

 static __rte_always_inline void
-otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
+otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
 {
-	if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) {
-		ws->cur_tt = SSO_SYNC_EMPTY;
+	if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY)
 		return;
-	}
-	otx2_write64(0, ws->swtag_flush_op);
-	ws->cur_tt = SSO_SYNC_EMPTY;
+	otx2_write64(0, flush_op);
 }

 static __rte_always_inline void
@@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws)
 }

 static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws)
+otx2_ssogws_head_wait(uint64_t tag_op)
 {
 #ifdef RTE_ARCH_ARM64
 	uint64_t tag;
@@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws)
 			"	tbz %[tag], 35, rty%=		\n"
 			"done%=:				\n"
 			: [tag] "=&r" (tag)
-			: [tag_op] "r" (ws->tag_op)
+			: [tag_op] "r" (tag_op)
 			);
 #else
 	/* Wait for the HEAD to be set */
-	while (!(otx2_read64(ws->tag_op) & BIT_ULL(35)))
+	while (!(otx2_read64(tag_op) & BIT_ULL(35)))
 		;
 #endif
 }
@@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
 }

 static __rte_always_inline uint16_t
-otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
-		     uint64_t *cmd,
+otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
 		     const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
 		     const uint32_t flags)
 {
@@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 	if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
 	    (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
 		txq = otx2_ssogws_xtract_meta(m, txq_data);
-		return otx2_sec_event_tx(ws, ev, m, txq, flags);
+		return otx2_sec_event_tx(base, ev, m, txq, flags);
 	}

 	/* Perform header writes before barrier for TSO */
@@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 					     m->ol_flags, segdw, flags);
 		if (!ev->sched_type) {
 			otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
-			otx2_ssogws_head_wait(ws);
+			otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
 			if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
 				otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr,
 						       txq->io_addr, segdw);
@@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,

 		if (!ev->sched_type) {
 			otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
-			otx2_ssogws_head_wait(ws);
+			otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
 			if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
 				otx2_nix_xmit_one(cmd, txq->lmt_addr,
 						  txq->io_addr, flags);
@@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 			return 1;
 	}

-	otx2_ssogws_swtag_flush(ws);
+	otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG,
+				base + SSOW_LF_GWS_OP_SWTAG_FLUSH);

 	return 1;
 }
diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
index 946488eab..81af4ca90 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.c
+++ b/drivers/event/octeontx2/otx2_worker_dual.c
@@ -26,9 +26,9 @@ static __rte_always_inline void
 otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws,
 			   const struct rte_event *ev)
 {
+	const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
 	const uint32_t tag = (uint32_t)ev->event;
 	const uint8_t new_tt = ev->sched_type;
-	const uint8_t cur_tt = ws->cur_tt;

 	/* 96XX model
 	 * cur_tt/new_tt     SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -66,15 +66,15 @@ otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws,
 	const uint8_t grp = ev->queue_id;

 	/* Group hasn't changed, Use SWTAG to forward the event */
-	if (vws->cur_grp == grp) {
+	if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) {
 		otx2_ssogws_dual_fwd_swtag(vws, ev);
 		ws->swtag_req = 1;
 	} else {
-	/*
-	 * Group has been changed for group based work pipelining,
-	 * Use deschedule/add_work operation to transfer the event to
-	 * new group/core
-	 */
+		/*
+		 * Group has been changed for group based work pipelining,
+		 * Use deschedule/add_work operation to transfer the event to
+		 * new group/core
+		 */
 		otx2_ssogws_dual_fwd_group(vws, ev, grp);
 	}
 }
@@ -93,7 +93,7 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev)
 		otx2_ssogws_dual_forward_event(ws, vws, ev);
 		break;
 	case RTE_EVENT_OP_RELEASE:
-		otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws);
+		otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op);
 		break;
 	default:
 		return 0;
@@ -314,15 +314,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,			\
 				       uint16_t nb_events)		\
 {									\
 	struct otx2_ssogws_dual *ws = port;				\
-	struct otx2_ssogws *vws =					\
-		(struct otx2_ssogws *)&ws->ws_state[!ws->vws];		\
 	uint64_t cmd[sz];						\
 									\
 	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t	\
-				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
-				    ws->tx_adptr_data,			\
-				    flags);				\
+	return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0],		\
+					  cmd, (const uint64_t		\
+					  (*)[RTE_MAX_QUEUES_PER_PORT])	\
+					  &ws->tx_adptr_data, flags);	\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -333,16 +331,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,			\
 					   struct rte_event ev[],	\
 					   uint16_t nb_events)		\
 {									\
-	struct otx2_ssogws_dual *ws = port;				\
-	struct otx2_ssogws *vws =					\
-		(struct otx2_ssogws *)&ws->ws_state[!ws->vws];		\
 	uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];			\
+	struct otx2_ssogws_dual *ws = port;				\
 									\
 	RTE_SET_USED(nb_events);					\
-	return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t	\
-				    (*)[RTE_MAX_QUEUES_PER_PORT])	\
-				    ws->tx_adptr_data,			\
-				    (flags) | NIX_TX_MULTI_SEG_F);	\
+	return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0],		\
+					  cmd, (const uint64_t		\
+					  (*)[RTE_MAX_QUEUES_PER_PORT])	\
+					  &ws->tx_adptr_data,		\
+					  (flags) | NIX_TX_MULTI_SEG_F);\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h
index 6e6061821..72b616439 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.h
+++ b/drivers/event/octeontx2/otx2_worker_dual.h
@@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws,
 	event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
 		(event.get_work0 & (0x3FFull << 36)) << 4 |
 		(event.get_work0 & 0xffffffff);
-	ws->cur_tt = event.sched_type;
-	ws->cur_grp = event.queue_id;

 	if (event.sched_type != SSO_TT_EMPTY) {
 		if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
index 284bcd536..c8eae3d62 100644
--- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
+++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
@@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess,
 }

 static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws);
+otx2_ssogws_head_wait(uint64_t base);

 static __rte_always_inline int
-otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
-		  struct rte_mbuf *m, const struct otx2_eth_txq *txq,
-		  const uint32_t offload_flags)
+otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m,
+		  const struct otx2_eth_txq *txq, const uint32_t offload_flags)
 {
 	uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail;
 	struct otx2_sec_session_ipsec_ip *sess;
@@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 	__mempool_check_cookies(m->pool, (void **)&m, 1, 0);

 	if (!ev->sched_type)
-		otx2_ssogws_head_wait(ws);
+		otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);

 	inst.param1 = sess->esn_hi >> 16;
 	inst.param2 = sess->esn_hi & 0xffff;
--
2.17.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality
  2020-11-22 21:18 [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality pbhagavatula
  2021-01-12  8:39 ` [dpdk-dev] [PATCH v2] " pbhagavatula
@ 2021-01-26  9:57 ` Jerin Jacob
  1 sibling, 0 replies; 3+ messages in thread
From: Jerin Jacob @ 2021-01-26  9:57 UTC (permalink / raw)
  To: Pavan Nikhilesh
  Cc: Jerin Jacob, Anoob Joseph, Nithin Dabilpuram, Kiran Kumar K, dpdk-dev

On Mon, Nov 23, 2020 at 2:48 AM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Enhance Tx path cache locality, remove current tag type and group
> stores from datapath to conserve store buffers.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>



Applied to dpdk-next-net-eventdev/for-main. Thanks



>  drivers/event/octeontx2/otx2_evdev.c       | 15 ++----
>  drivers/event/octeontx2/otx2_evdev.h       | 24 ++++-----
>  drivers/event/octeontx2/otx2_worker.c      | 42 +++++----------
>  drivers/event/octeontx2/otx2_worker.h      | 32 +++++------
>  drivers/event/octeontx2/otx2_worker_dual.c | 63 ++++++++++------------
>  drivers/event/octeontx2/otx2_worker_dual.h |  2 -
>  drivers/net/octeontx2/otx2_ethdev_sec_tx.h |  9 ++--
>  7 files changed, 74 insertions(+), 113 deletions(-)
>
> diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c
> index 0fe014c24..14f16a68f 100644
> --- a/drivers/event/octeontx2/otx2_evdev.c
> +++ b/drivers/event/octeontx2/otx2_evdev.c
> @@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev)
>                 ws->port = i;
>                 base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
>                 sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base);
> +               ws->base[0] = base;
>                 vws++;
>
>                 base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
>                 sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base);
> +               ws->base[1] = base;
>                 vws++;
>
>                 gws_cookie = ssogws_get_cookie(ws);
> @@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev)
>                 ws->port = i;
>                 base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12);
>                 sso_set_port_ops(ws, base);
> +               ws->base = base;
>
>                 gws_cookie = ssogws_get_cookie(ws);
>                 gws_cookie->event_dev = event_dev;
> @@ -1447,20 +1450,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
>                         ws = event_dev->data->ports[i];
>                         ssogws_reset((struct otx2_ssogws *)&ws->ws_state[0]);
>                         ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]);
> -                       ws->swtag_req = 0;
>                         ws->vws = 0;
> -                       ws->ws_state[0].cur_grp = 0;
> -                       ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
> -                       ws->ws_state[1].cur_grp = 0;
> -                       ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY;
>                 } else {
>                         struct otx2_ssogws *ws;
>
>                         ws = event_dev->data->ports[i];
>                         ssogws_reset(ws);
> -                       ws->swtag_req = 0;
> -                       ws->cur_grp = 0;
> -                       ws->cur_tt = SSO_SYNC_EMPTY;
>                 }
>         }
>
> @@ -1479,8 +1474,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
>                         otx2_write64(enable, ws->grps_base[i] +
>                                      SSO_LF_GGRP_QCTL);
>                 }
> -               ws->ws_state[0].cur_grp = 0;
> -               ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
>         } else {
>                 struct otx2_ssogws *ws = event_dev->data->ports[0];
>
> @@ -1492,8 +1485,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
>                         otx2_write64(enable, ws->grps_base[i] +
>                                      SSO_LF_GGRP_QCTL);
>                 }
> -               ws->cur_grp = 0;
> -               ws->cur_tt = SSO_SYNC_EMPTY;
>         }
>
>         /* reset SSO GWS cache */
> diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
> index 0513cb81c..e381b9e52 100644
> --- a/drivers/event/octeontx2/otx2_evdev.h
> +++ b/drivers/event/octeontx2/otx2_evdev.h
> @@ -80,6 +80,8 @@
>
>  #define OTX2_SSOW_GET_BASE_ADDR(_GW)        ((_GW) - SSOW_LF_GWS_OP_GET_WORK)
>  #define OTX2_SSOW_TT_FROM_TAG(x)           (((x) >> 32) & SSO_TT_EMPTY)
> +#define OTX2_SSOW_GRP_FROM_TAG(x)          (((x) >> 36) & 0x3ff)
> +#define OTX2_SSOW_SWTAG_PEND(x)                    ((x) & BIT_ULL(62))
>
>  #define NSEC2USEC(__ns)                        ((__ns) / 1E3)
>  #define USEC2NSEC(__us)                 ((__us) * 1E3)
> @@ -169,25 +171,23 @@ struct otx2_sso_evdev {
>         uintptr_t wqp_op;                                                      \
>         uintptr_t swtag_flush_op;                                              \
>         uintptr_t swtag_norm_op;                                               \
> -       uintptr_t swtag_desched_op;                                            \
> -       uint8_t cur_tt;                                                        \
> -       uint8_t cur_grp
> +       uintptr_t swtag_desched_op;
>
>  /* Event port aka GWS */
>  struct otx2_ssogws {
>         /* Get Work Fastpath data */
>         OTX2_SSOGWS_OPS;
> -       uint8_t swtag_req;
> +       /* PTP timestamp */
> +       struct otx2_timesync_info *tstamp;
>         void *lookup_mem;
>         uint8_t port;
>         /* Add Work Fastpath data */
>         uint64_t xaq_lmt __rte_cache_aligned;
>         uint64_t *fc_mem;
>         uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
> -       /* PTP timestamp */
> -       struct otx2_timesync_info *tstamp;
>         /* Tx Fastpath data */
> -       uint8_t tx_adptr_data[] __rte_cache_aligned;
> +       uint64_t base __rte_cache_aligned;
> +       uint8_t tx_adptr_data[];
>  } __rte_cache_aligned;
>
>  struct otx2_ssogws_state {
> @@ -197,18 +197,18 @@ struct otx2_ssogws_state {
>  struct otx2_ssogws_dual {
>         /* Get Work Fastpath data */
>         struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */
> -       uint8_t swtag_req;
> -       uint8_t vws; /* Ping pong bit */
> +       /* PTP timestamp */
> +       struct otx2_timesync_info *tstamp;
>         void *lookup_mem;
> +       uint8_t vws; /* Ping pong bit */
>         uint8_t port;
>         /* Add Work Fastpath data */
>         uint64_t xaq_lmt __rte_cache_aligned;
>         uint64_t *fc_mem;
>         uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
> -       /* PTP timestamp */
> -       struct otx2_timesync_info *tstamp;
>         /* Tx Fastpath data */
> -       uint8_t tx_adptr_data[] __rte_cache_aligned;
> +       uint64_t base[2] __rte_cache_aligned;
> +       uint8_t tx_adptr_data[];
>  } __rte_cache_aligned;
>
>  static inline struct otx2_sso_evdev *
> diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
> index b098407e0..7ed836c1e 100644
> --- a/drivers/event/octeontx2/otx2_worker.c
> +++ b/drivers/event/octeontx2/otx2_worker.c
> @@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
>  {
>         const uint32_t tag = (uint32_t)ev->event;
>         const uint8_t new_tt = ev->sched_type;
> -       const uint8_t cur_tt = ws->cur_tt;
> +       const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
>
>         /* 96XX model
>          * cur_tt/new_tt     SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
> @@ -41,8 +41,6 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
>         } else {
>                 otx2_ssogws_swtag_norm(ws, tag, new_tt);
>         }
> -
> -       ws->swtag_req = 1;
>  }
>
>  static __rte_always_inline void
> @@ -64,7 +62,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
>         const uint8_t grp = ev->queue_id;
>
>         /* Group hasn't changed, Use SWTAG to forward the event */
> -       if (ws->cur_grp == grp)
> +       if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp)
>                 otx2_ssogws_fwd_swtag(ws, ev);
>         else
>         /*
> @@ -75,12 +73,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
>                 otx2_ssogws_fwd_group(ws, ev, grp);
>  }
>
> -static __rte_always_inline void
> -otx2_ssogws_release_event(struct otx2_ssogws *ws)
> -{
> -       otx2_ssogws_swtag_flush(ws);
> -}
> -
>  #define R(name, f6, f5, f4, f3, f2, f1, f0, flags)                     \
>  uint16_t __rte_hot                                                             \
>  otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev,              \
> @@ -90,8 +82,7 @@ otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev,             \
>                                                                         \
>         RTE_SET_USED(timeout_ticks);                                    \
>                                                                         \
> -       if (ws->swtag_req) {                                            \
> -               ws->swtag_req = 0;                                      \
> +       if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) {            \
>                 otx2_ssogws_swtag_wait(ws);                             \
>                 return 1;                                               \
>         }                                                               \
> @@ -117,8 +108,7 @@ otx2_ssogws_deq_timeout_ ##name(void *port, struct rte_event *ev,   \
>         uint16_t ret = 1;                                               \
>         uint64_t iter;                                                  \
>                                                                         \
> -       if (ws->swtag_req) {                                            \
> -               ws->swtag_req = 0;                                      \
> +       if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) {            \
>                 otx2_ssogws_swtag_wait(ws);                             \
>                 return ret;                                             \
>         }                                                               \
> @@ -149,8 +139,7 @@ otx2_ssogws_deq_seg_ ##name(void *port, struct rte_event *ev,               \
>                                                                         \
>         RTE_SET_USED(timeout_ticks);                                    \
>                                                                         \
> -       if (ws->swtag_req) {                                            \
> -               ws->swtag_req = 0;                                      \
> +       if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) {            \
>                 otx2_ssogws_swtag_wait(ws);                             \
>                 return 1;                                               \
>         }                                                               \
> @@ -177,8 +166,7 @@ otx2_ssogws_deq_seg_timeout_ ##name(void *port, struct rte_event *ev,       \
>         uint16_t ret = 1;                                               \
>         uint64_t iter;                                                  \
>                                                                         \
> -       if (ws->swtag_req) {                                            \
> -               ws->swtag_req = 0;                                      \
> +       if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) {            \
>                 otx2_ssogws_swtag_wait(ws);                             \
>                 return ret;                                             \
>         }                                                               \
> @@ -221,7 +209,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev)
>                 otx2_ssogws_forward_event(ws, ev);
>                 break;
>         case RTE_EVENT_OP_RELEASE:
> -               otx2_ssogws_release_event(ws);
> +               otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
>                 break;
>         default:
>                 return 0;
> @@ -274,14 +262,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],      \
>  {                                                                      \
>         struct otx2_ssogws *ws = port;                                  \
>         uint64_t cmd[sz];                                               \
> -       int i;                                                          \
>                                                                         \
> -       for (i = 0; i < nb_events; i++)                                 \
> -               otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t   \
> +       RTE_SET_USED(nb_events);                                        \
> +       return otx2_ssogws_event_tx(ws->base, &ev[0], cmd,              \
> +                                   (const uint64_t                     \
>                                     (*)[RTE_MAX_QUEUES_PER_PORT])       \
>                                     &ws->tx_adptr_data,                 \
>                                     flags);                             \
> -       return nb_events;                                               \
>  }
>  SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
>  #undef T
> @@ -293,14 +280,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
>  {                                                                      \
>         uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];                 \
>         struct otx2_ssogws *ws = port;                                  \
> -       int i;                                                          \
>                                                                         \
> -       for (i = 0; i < nb_events; i++)                                 \
> -               otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t   \
> +       RTE_SET_USED(nb_events);                                        \
> +       return otx2_ssogws_event_tx(ws->base, &ev[0], cmd,              \
> +                                   (const uint64_t                     \
>                                     (*)[RTE_MAX_QUEUES_PER_PORT])       \
>                                     &ws->tx_adptr_data,                 \
>                                     (flags) | NIX_TX_MULTI_SEG_F);      \
> -       return nb_events;                                               \
>  }
>  SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
>  #undef T
> @@ -335,7 +321,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base,
>                 if (fn != NULL && ev.u64 != 0)
>                         fn(arg, ev);
>                 if (ev.sched_type != SSO_TT_EMPTY)
> -                       otx2_ssogws_swtag_flush(ws);
> +                       otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
>                 rte_mb();
>                 aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT);
>                 ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT);
> diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
> index 0a7d6671c..2b716c042 100644
> --- a/drivers/event/octeontx2/otx2_worker.h
> +++ b/drivers/event/octeontx2/otx2_worker.h
> @@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev,
>         event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
>                 (event.get_work0 & (0x3FFull << 36)) << 4 |
>                 (event.get_work0 & 0xffffffff);
> -       ws->cur_tt = event.sched_type;
> -       ws->cur_grp = event.queue_id;
>
>         if (event.sched_type != SSO_TT_EMPTY) {
>                 if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
> @@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev,
>         event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
>                 (event.get_work0 & (0x3FFull << 36)) << 4 |
>                 (event.get_work0 & 0xffffffff);
> -       ws->cur_tt = event.sched_type;
> -       ws->cur_grp = event.queue_id;
>
>         if (event.sched_type != SSO_TT_EMPTY &&
>             event.event_type == RTE_EVENT_TYPE_ETHDEV) {
> @@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
>  {
>         otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
>                      SSOW_LF_GWS_OP_SWTAG_UNTAG);
> -       ws->cur_tt = SSO_SYNC_UNTAGGED;
>  }
>
>  static __rte_always_inline void
> -otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
> +otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
>  {
> -       if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) {
> -               ws->cur_tt = SSO_SYNC_EMPTY;
> +       if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY)
>                 return;
> -       }
> -       otx2_write64(0, ws->swtag_flush_op);
> -       ws->cur_tt = SSO_SYNC_EMPTY;
> +       otx2_write64(0, flush_op);
>  }
>
>  static __rte_always_inline void
> @@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws)
>  }
>
>  static __rte_always_inline void
> -otx2_ssogws_head_wait(struct otx2_ssogws *ws)
> +otx2_ssogws_head_wait(uint64_t tag_op)
>  {
>  #ifdef RTE_ARCH_ARM64
>         uint64_t tag;
> @@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws)
>                         "       tbz %[tag], 35, rty%=           \n"
>                         "done%=:                                \n"
>                         : [tag] "=&r" (tag)
> -                       : [tag_op] "r" (ws->tag_op)
> +                       : [tag_op] "r" (tag_op)
>                         );
>  #else
>         /* Wait for the HEAD to be set */
> -       while (!(otx2_read64(ws->tag_op) & BIT_ULL(35)))
> +       while (!(otx2_read64(tag_op) & BIT_ULL(35)))
>                 ;
>  #endif
>  }
> @@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
>  }
>
>  static __rte_always_inline uint16_t
> -otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
> -                    uint64_t *cmd,
> +otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
>                      const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
>                      const uint32_t flags)
>  {
> @@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
>         if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
>             (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
>                 txq = otx2_ssogws_xtract_meta(m, txq_data);
> -               return otx2_sec_event_tx(ws, ev, m, txq, flags);
> +               return otx2_sec_event_tx(base, ev, m, txq, flags);
>         }
>
>         /* Perform header writes before barrier for TSO */
> @@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
>                                              m->ol_flags, segdw, flags);
>                 if (!ev->sched_type) {
>                         otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
> -                       otx2_ssogws_head_wait(ws);
> +                       otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
>                         if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
>                                 otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr,
>                                                        txq->io_addr, segdw);
> @@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
>
>                 if (!ev->sched_type) {
>                         otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
> -                       otx2_ssogws_head_wait(ws);
> +                       otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
>                         if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
>                                 otx2_nix_xmit_one(cmd, txq->lmt_addr,
>                                                   txq->io_addr, flags);
> @@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
>                         return 1;
>         }
>
> -       otx2_ssogws_swtag_flush(ws);
> +       otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG,
> +                               base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
>
>         return 1;
>  }
> diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
> index 946488eab..820455788 100644
> --- a/drivers/event/octeontx2/otx2_worker_dual.c
> +++ b/drivers/event/octeontx2/otx2_worker_dual.c
> @@ -26,9 +26,9 @@ static __rte_always_inline void
>  otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws,
>                            const struct rte_event *ev)
>  {
> +       const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
>         const uint32_t tag = (uint32_t)ev->event;
>         const uint8_t new_tt = ev->sched_type;
> -       const uint8_t cur_tt = ws->cur_tt;
>
>         /* 96XX model
>          * cur_tt/new_tt     SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
> @@ -59,22 +59,20 @@ otx2_ssogws_dual_fwd_group(struct otx2_ssogws_state *ws,
>  }
>
>  static __rte_always_inline void
> -otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws,
> -                              struct otx2_ssogws_state *vws,
> +otx2_ssogws_dual_forward_event(struct otx2_ssogws_state *vws,
>                                const struct rte_event *ev)
>  {
>         const uint8_t grp = ev->queue_id;
>
>         /* Group hasn't changed, Use SWTAG to forward the event */
> -       if (vws->cur_grp == grp) {
> +       if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) {
>                 otx2_ssogws_dual_fwd_swtag(vws, ev);
> -               ws->swtag_req = 1;
>         } else {
> -       /*
> -        * Group has been changed for group based work pipelining,
> -        * Use deschedule/add_work operation to transfer the event to
> -        * new group/core
> -        */
> +               /*
> +                * Group has been changed for group based work pipelining,
> +                * Use deschedule/add_work operation to transfer the event to
> +                * new group/core
> +                */
>                 otx2_ssogws_dual_fwd_group(vws, ev, grp);
>         }
>  }
> @@ -90,10 +88,10 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev)
>                 rte_smp_mb();
>                 return otx2_ssogws_dual_new_event(ws, ev);
>         case RTE_EVENT_OP_FORWARD:
> -               otx2_ssogws_dual_forward_event(ws, vws, ev);
> +               otx2_ssogws_dual_forward_event(vws, ev);
>                 break;
>         case RTE_EVENT_OP_RELEASE:
> -               otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws);
> +               otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op);
>                 break;
>         default:
>                 return 0;
> @@ -135,7 +133,7 @@ otx2_ssogws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
>         struct otx2_ssogws_state *vws = &ws->ws_state[!ws->vws];
>
>         RTE_SET_USED(nb_events);
> -       otx2_ssogws_dual_forward_event(ws, vws, ev);
> +       otx2_ssogws_dual_forward_event(vws, ev);
>
>         return 1;
>  }
> @@ -150,10 +148,10 @@ otx2_ssogws_dual_deq_ ##name(void *port, struct rte_event *ev,            \
>                                                                         \
>         rte_prefetch_non_temporal(ws);                                  \
>         RTE_SET_USED(timeout_ticks);                                    \
> -       if (ws->swtag_req) {                                            \
> +       if (OTX2_SSOW_SWTAG_PEND(otx2_read64(                           \
> +               ws->ws_state[!ws->vws].tag_op))) {                      \
>                 otx2_ssogws_swtag_wait((struct otx2_ssogws *)           \
>                                        &ws->ws_state[!ws->vws]);        \
> -               ws->swtag_req = 0;                                      \
>                 return 1;                                               \
>         }                                                               \
>                                                                         \
> @@ -184,10 +182,10 @@ otx2_ssogws_dual_deq_timeout_ ##name(void *port, struct rte_event *ev,    \
>         uint64_t iter;                                                  \
>         uint8_t gw;                                                     \
>                                                                         \
> -       if (ws->swtag_req) {                                            \
> +       if (OTX2_SSOW_SWTAG_PEND(otx2_read64(                           \
> +               ws->ws_state[!ws->vws].tag_op))) {                      \
>                 otx2_ssogws_swtag_wait((struct otx2_ssogws *)           \
>                                        &ws->ws_state[!ws->vws]);        \
> -               ws->swtag_req = 0;                                      \
>                 return 1;                                               \
>         }                                                               \
>                                                                         \
> @@ -228,10 +226,10 @@ otx2_ssogws_dual_deq_seg_ ##name(void *port, struct rte_event *ev,        \
>         uint8_t gw;                                                     \
>                                                                         \
>         RTE_SET_USED(timeout_ticks);                                    \
> -       if (ws->swtag_req) {                                            \
> +       if (OTX2_SSOW_SWTAG_PEND(otx2_read64(                           \
> +               ws->ws_state[!ws->vws].tag_op))) {                      \
>                 otx2_ssogws_swtag_wait((struct otx2_ssogws *)           \
>                                        &ws->ws_state[!ws->vws]);        \
> -               ws->swtag_req = 0;                                      \
>                 return 1;                                               \
>         }                                                               \
>                                                                         \
> @@ -266,10 +264,10 @@ otx2_ssogws_dual_deq_seg_timeout_ ##name(void *port,                      \
>         uint64_t iter;                                                  \
>         uint8_t gw;                                                     \
>                                                                         \
> -       if (ws->swtag_req) {                                            \
> +       if (OTX2_SSOW_SWTAG_PEND(otx2_read64(                           \
> +               ws->ws_state[!ws->vws].tag_op))) {                      \
>                 otx2_ssogws_swtag_wait((struct otx2_ssogws *)           \
>                                        &ws->ws_state[!ws->vws]);        \
> -               ws->swtag_req = 0;                                      \
>                 return 1;                                               \
>         }                                                               \
>                                                                         \
> @@ -314,15 +312,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,                        \
>                                        uint16_t nb_events)              \
>  {                                                                      \
>         struct otx2_ssogws_dual *ws = port;                             \
> -       struct otx2_ssogws *vws =                                       \
> -               (struct otx2_ssogws *)&ws->ws_state[!ws->vws];          \
>         uint64_t cmd[sz];                                               \
>                                                                         \
>         RTE_SET_USED(nb_events);                                        \
> -       return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t       \
> -                                   (*)[RTE_MAX_QUEUES_PER_PORT])       \
> -                                   ws->tx_adptr_data,                  \
> -                                   flags);                             \
> +       return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0],         \
> +                                         cmd, (const uint64_t          \
> +                                         (*)[RTE_MAX_QUEUES_PER_PORT]) \
> +                                         &ws->tx_adptr_data, flags);   \
>  }
>  SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
>  #undef T
> @@ -333,16 +329,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,                    \
>                                            struct rte_event ev[],       \
>                                            uint16_t nb_events)          \
>  {                                                                      \
> -       struct otx2_ssogws_dual *ws = port;                             \
> -       struct otx2_ssogws *vws =                                       \
> -               (struct otx2_ssogws *)&ws->ws_state[!ws->vws];          \
>         uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];                 \
> +       struct otx2_ssogws_dual *ws = port;                             \
>                                                                         \
>         RTE_SET_USED(nb_events);                                        \
> -       return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t       \
> -                                   (*)[RTE_MAX_QUEUES_PER_PORT])       \
> -                                   ws->tx_adptr_data,                  \
> -                                   (flags) | NIX_TX_MULTI_SEG_F);      \
> +       return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0],         \
> +                                         cmd, (const uint64_t          \
> +                                         (*)[RTE_MAX_QUEUES_PER_PORT]) \
> +                                         &ws->tx_adptr_data,           \
> +                                         (flags) | NIX_TX_MULTI_SEG_F);\
>  }
>  SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
>  #undef T
> diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h
> index 6e6061821..72b616439 100644
> --- a/drivers/event/octeontx2/otx2_worker_dual.h
> +++ b/drivers/event/octeontx2/otx2_worker_dual.h
> @@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws,
>         event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
>                 (event.get_work0 & (0x3FFull << 36)) << 4 |
>                 (event.get_work0 & 0xffffffff);
> -       ws->cur_tt = event.sched_type;
> -       ws->cur_grp = event.queue_id;
>
>         if (event.sched_type != SSO_TT_EMPTY) {
>                 if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
> diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
> index 284bcd536..c8eae3d62 100644
> --- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
> +++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
> @@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess,
>  }
>
>  static __rte_always_inline void
> -otx2_ssogws_head_wait(struct otx2_ssogws *ws);
> +otx2_ssogws_head_wait(uint64_t base);
>
>  static __rte_always_inline int
> -otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
> -                 struct rte_mbuf *m, const struct otx2_eth_txq *txq,
> -                 const uint32_t offload_flags)
> +otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m,
> +                 const struct otx2_eth_txq *txq, const uint32_t offload_flags)
>  {
>         uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail;
>         struct otx2_sec_session_ipsec_ip *sess;
> @@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
>         __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
>
>         if (!ev->sched_type)
> -               otx2_ssogws_head_wait(ws);
> +               otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
>
>         inst.param1 = sess->esn_hi >> 16;
>         inst.param2 = sess->esn_hi & 0xffff;
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-01-26  9:58 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-22 21:18 [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality pbhagavatula
2021-01-12  8:39 ` [dpdk-dev] [PATCH v2] " pbhagavatula
2021-01-26  9:57 ` [dpdk-dev] [PATCH] " Jerin Jacob

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).