From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id E0020A0545;
	Tue, 11 Oct 2022 11:08:42 +0200 (CEST)
Received: from [217.70.189.124] (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id 346EF42D15;
	Tue, 11 Oct 2022 11:08:27 +0200 (CEST)
Received: from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com
 [67.231.148.174])
 by mails.dpdk.org (Postfix) with ESMTP id AB83542D02
 for <dev@dpdk.org>; Tue, 11 Oct 2022 11:08:24 +0200 (CEST)
Received: from pps.filterd (m0045849.ppops.net [127.0.0.1])
 by mx0a-0016f401.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 29B8OGGd030115;
 Tue, 11 Oct 2022 02:08:20 -0700
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;
 h=from : to : cc :
 subject : date : message-id : in-reply-to : references : mime-version :
 content-transfer-encoding : content-type; s=pfpt0220;
 bh=sT35oXdP8jx+Rf+aWvUhlGUCgANikZEIRwdSZoGkEss=;
 b=ROucJwAHtlVBXuN6rCedakwGYwKF4zoFVMuWZm5kyYNRA6r+urBJytTYBL781xcpBWf+
 oYVmkjplmevYhurFc7YAwZySaSFdEseIEFxqEM0Nf/vDUAeQ0tUQ2ZSc6FdSS+O89AYp
 10QWcXYKjyVli46i1TUvXiR5x+BNFaV7OxVJyb0sVbJ78CPp5kH7cR/wbH4ThlMbjW3e
 7UqQCRv6UcUbtEmwW5H6+NWhCPx119xsId0KHrQyeSzZvnmnpteQ6XbQILgjdpMiRVhu
 ooPLGM3thoqKoxOO1NV9ty4Aazl43j6PE0kWNaWCu72ymbP4TaJ4gZsjaSQAyX9p9dl6 4A== 
Received: from dc5-exch01.marvell.com ([199.233.59.181])
 by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 3k54xug5nq-1
 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT);
 Tue, 11 Oct 2022 02:08:20 -0700
Received: from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH01.marvell.com
 (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.2;
 Tue, 11 Oct 2022 02:08:18 -0700
Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com
 (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.2 via Frontend
 Transport; Tue, 11 Oct 2022 02:08:18 -0700
Received: from MININT-80QBFE8.corp.innovium.com (unknown [10.28.161.88])
 by maili.marvell.com (Postfix) with ESMTP id D248A3F7088;
 Tue, 11 Oct 2022 02:08:14 -0700 (PDT)
From: <pbhagavatula@marvell.com>
To: <jerinj@marvell.com>, David Christensen <drc@linux.vnet.ibm.com>, "Ruifeng
 Wang" <ruifeng.wang@arm.com>,
 Bruce Richardson <bruce.richardson@intel.com>,
 Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
CC: <dev@dpdk.org>, Pavan Nikhilesh <pbhagavatula@marvell.com>
Subject: [PATCH v4 3/5] examples/l3fwd: use lpm vector path for event vector
Date: Tue, 11 Oct 2022 14:38:03 +0530
Message-ID: <20221011090805.3602-3-pbhagavatula@marvell.com>
X-Mailer: git-send-email 2.25.1
In-Reply-To: <20221011090805.3602-1-pbhagavatula@marvell.com>
References: <20220911181250.2286-1-pbhagavatula@marvell.com>
 <20221011090805.3602-1-pbhagavatula@marvell.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
X-Proofpoint-GUID: 9TazKmPXdHffQFXItIgHE5cVAB8If4rP
X-Proofpoint-ORIG-GUID: 9TazKmPXdHffQFXItIgHE5cVAB8If4rP
X-Proofpoint-Virus-Version: vendor=baseguard
 engine=ICAP:2.0.205,Aquarius:18.0.895,Hydra:6.0.528,FMLib:17.11.122.1
 definitions=2022-10-11_03,2022-10-10_02,2022-06-22_01
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use lpm vector path to process event vector.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++
 examples/l3fwd/l3fwd_event.h   | 71 ++++++++++++++++++++++++++++++++++
 examples/l3fwd/l3fwd_lpm.c     | 39 +++++++++++--------
 examples/l3fwd/l3fwd_neon.h    | 47 ++++++++++++++++++++++
 examples/l3fwd/l3fwd_sse.h     | 44 +++++++++++++++++++++
 5 files changed, 214 insertions(+), 16 deletions(-)

diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h
index 87018f5dbe..e45e138e59 100644
--- a/examples/l3fwd/l3fwd_altivec.h
+++ b/examples/l3fwd/l3fwd_altivec.h
@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
 	}
 }
 
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+	uint16_t i = 0, res;
+
+	while (nb_elem > 7) {
+		__vector unsigned short dp1;
+		__vector unsigned short dp;
+
+		dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
+		dp1 = *((__vector unsigned short *)&dst_ports[i]);
+		res = vec_all_eq(dp1, dp);
+		if (!res)
+			return BAD_PORT;
+
+		nb_elem -= 8;
+		i += 8;
+	}
+
+	while (nb_elem) {
+		if (dst_ports[i] != dst_ports[0])
+			return BAD_PORT;
+		nb_elem--;
+		i++;
+	}
+
+	return dst_ports[0];
+}
+
 #endif /* _L3FWD_ALTIVEC_H_ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index b93841a16f..3fe38aada0 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -82,6 +82,27 @@ struct l3fwd_event_resources {
 	uint64_t vector_tmo_ns;
 };
 
+#if defined(RTE_ARCH_X86)
+#include "l3fwd_sse.h"
+#elif defined __ARM_NEON
+#include "l3fwd_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "l3fwd_altivec.h"
+#else
+static inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+	int i;
+
+	for (i = 0; i < nb_elem; i++) {
+		if (dst_ports[i] != dst_ports[0])
+			return BAD_PORT;
+	}
+
+	return dst_ports[0];
+}
+#endif
+
 static inline void
 event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
 {
@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
 	}
 }
 
+static inline uint16_t
+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
+		   uint16_t nb_pkts)
+{
+	uint16_t *des_pos, free = 0;
+	struct rte_mbuf **pos;
+	int i;
+
+	/* Filter out and free bad packets */
+	for (i = 0; i < nb_pkts; i++) {
+		if (dst_port[i] == BAD_PORT) {
+			rte_pktmbuf_free(mbufs[i]);
+			if (!free) {
+				pos = &mbufs[i];
+				des_pos = &dst_port[i];
+			}
+			free++;
+			continue;
+		}
+
+		if (free) {
+			*pos = mbufs[i];
+			pos++;
+			*des_pos = dst_port[i];
+			des_pos++;
+		}
+	}
 
+	return nb_pkts - free;
+}
+
+static inline void
+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
+{
+	uint16_t port, i;
+
+	vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem);
+	/* Verify destination array */
+	port = process_dst_port(dst_port, vec->nb_elem);
+	if (port == BAD_PORT) {
+		vec->attr_valid = 0;
+		for (i = 0; i < vec->nb_elem; i++) {
+			vec->mbufs[i]->port = dst_port[i];
+			rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
+		}
+	} else {
+		vec->attr_valid = 1;
+		vec->port = port;
+		vec->queue = 0;
+	}
+}
 
 struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
 void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index 22d7f61a42..5172979c72 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy)
 }
 
 static __rte_always_inline void
-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf)
+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf,
+			 uint16_t *dst_port)
 {
 	struct rte_mbuf **mbufs = vec->mbufs;
 	int i;
 
-	/* Process first packet to init vector attributes */
-	lpm_process_event_pkt(lconf, mbufs[0]);
+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64
 	if (vec->attr_valid) {
-		if (mbufs[0]->port != BAD_PORT)
-			vec->port = mbufs[0]->port;
-		else
-			vec->attr_valid = 0;
+		l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
+					  dst_port, lconf, 1);
+	} else {
+		for (i = 0; i < vec->nb_elem; i++)
+			l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
+						  &dst_port[i], lconf, 1);
 	}
+#else
+	for (i = 0; i < vec->nb_elem; i++)
+		dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]);
+#endif
 
-	for (i = 1; i < vec->nb_elem; i++) {
-		lpm_process_event_pkt(lconf, mbufs[i]);
-		event_vector_attr_validate(vec, mbufs[i]);
-	}
+	process_event_vector(vec, dst_port);
 }
 
 /* Same eventdev loop for single and burst of vector */
@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
 	struct rte_event events[MAX_PKT_BURST];
 	int i, nb_enq = 0, nb_deq = 0;
 	struct lcore_conf *lconf;
+	uint16_t *dst_port_list;
 	unsigned int lcore_id;
 
 	if (event_p_id < 0)
@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
 
 	lcore_id = rte_lcore_id();
 	lconf = &lcore_conf[lcore_id];
-
+	dst_port_list =
+		rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+			    RTE_CACHE_LINE_SIZE);
+	if (dst_port_list == NULL)
+		return;
 	RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
 
 	while (!force_quit) {
@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
 				events[i].op = RTE_EVENT_OP_FORWARD;
 			}
 
-			lpm_process_event_vector(events[i].vec, lconf);
-
-			if (flags & L3FWD_EVENT_TX_DIRECT)
-				event_vector_txq_set(events[i].vec, 0);
+			lpm_process_event_vector(events[i].vec, lconf,
+						 dst_port_list);
 		}
 
 		if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
 
 	l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
 				   nb_deq, 1);
+	rte_free(dst_port_list);
 }
 
 int __rte_noinline
diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
index ce515e0bc4..bf365341fb 100644
--- a/examples/l3fwd/l3fwd_neon.h
+++ b/examples/l3fwd/l3fwd_neon.h
@@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
 	}
 }
 
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+	uint16_t i = 0;
+
+#if defined(RTE_ARCH_ARM64)
+	uint16_t res;
+
+	while (nb_elem > 7) {
+		uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
+		uint16x8_t dp1;
+
+		dp1 = vld1q_u16(&dst_ports[i]);
+		dp1 = vceqq_u16(dp1, dp);
+		res = vminvq_u16(dp1);
+		if (!res)
+			return BAD_PORT;
+
+		nb_elem -= 8;
+		i += 8;
+	}
+
+	while (nb_elem > 3) {
+		uint16x4_t dp = vdup_n_u16(dst_ports[0]);
+		uint16x4_t dp1;
+
+		dp1 = vld1_u16(&dst_ports[i]);
+		dp1 = vceq_u16(dp1, dp);
+		res = vminv_u16(dp1);
+		if (!res)
+			return BAD_PORT;
+
+		nb_elem -= 4;
+		i += 4;
+	}
+#endif
+
+	while (nb_elem) {
+		if (dst_ports[i] != dst_ports[0])
+			return BAD_PORT;
+		nb_elem--;
+		i++;
+	}
+
+	return dst_ports[0];
+}
+
 #endif /* _L3FWD_NEON_H_ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index 0f0d0323a2..083729cdef 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
 	}
 }
 
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+	uint16_t i = 0, res;
+
+	while (nb_elem > 7) {
+		__m128i dp = _mm_set1_epi16(dst_ports[0]);
+		__m128i dp1;
+
+		dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+		dp1 = _mm_cmpeq_epi16(dp1, dp);
+		res = _mm_movemask_epi8(dp1);
+		if (res != 0xFFFF)
+			return BAD_PORT;
+
+		nb_elem -= 8;
+		i += 8;
+	}
+
+	while (nb_elem > 3) {
+		__m128i dp = _mm_set1_epi16(dst_ports[0]);
+		__m128i dp1;
+
+		dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+		dp1 = _mm_cmpeq_epi16(dp1, dp);
+		dp1 = _mm_unpacklo_epi16(dp1, dp1);
+		res = _mm_movemask_ps((__m128)dp1);
+		if (res != 0xF)
+			return BAD_PORT;
+
+		nb_elem -= 4;
+		i += 4;
+	}
+
+	while (nb_elem) {
+		if (dst_ports[i] != dst_ports[0])
+			return BAD_PORT;
+		nb_elem--;
+		i++;
+	}
+
+	return dst_ports[0];
+}
+
 #endif /* _L3FWD_SSE_H_ */
-- 
2.25.1