* [PATCH 1/5] examples/l3fwd: fix port group mask generation
@ 2022-08-29 9:44 pbhagavatula
2022-08-29 9:44 ` [PATCH 2/5] examples/l3fwd: split processing and send stages pbhagavatula
` (4 more replies)
0 siblings, 5 replies; 41+ messages in thread
From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw)
To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix port group mask generation in altivec, vec_any_eq returns
0 or 1 while port_groupx4 expects comparison mask result.
Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc")
Cc: stable@dpdk.org
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/common/altivec/port_group.h | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h
index 5e209b02fa..7a6ef390ff 100644
--- a/examples/common/altivec/port_group.h
+++ b/examples/common/altivec/port_group.h
@@ -26,12 +26,19 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
uint16_t u16[FWDSTEP + 1];
uint64_t u64;
} *pnum = (void *)pn;
+ union u_vec {
+ __vector unsigned short v_us;
+ unsigned short s[8];
+ };
+ union u_vec res;
int32_t v;
- v = vec_any_eq(dp1, dp2);
-
+ dp1 = vec_cmpeq(dp1, dp2);
+ res.v_us = dp1;
+ v = (res.s[0] & 0x1) | (res.s[1] & 0x2) | (res.s[2] & 0x4) |
+ (res.s[3] & 0x8);
/* update last port counter. */
lp[0] += gptbl[v].lpv;
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH 2/5] examples/l3fwd: split processing and send stages
2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
@ 2022-08-29 9:44 ` pbhagavatula
2022-08-29 9:44 ` [PATCH 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
` (3 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Split packet processing from packet send stage, as send stage
is not common for poll and event mode.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++-----------
examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++---
examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++-------
examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++---
4 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index e76f2760b0..12b997e477 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
return portid;
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t *dst_port, uint16_t portid,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i, j, pos;
- uint16_t dst_port[MAX_PKT_BURST];
/*
* Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets
@@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
dst_port[j + i] = em_get_dst_port(qconf,
pkts_burst[j + i], portid);
}
+
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
}
- for (; j < nb_rx; j++)
+ for (; j < nb_rx; j++) {
dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &pkts_burst[j]->port);
+ }
+}
- send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0);
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
/*
@@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
*/
int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) {
+ for (j = 0; j < nb_rx; j++)
pkts_burst[j] = ev[j]->mbuf;
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *) + 1);
- }
for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
@@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
}
continue;
}
- processx4_step3(&pkts_burst[j], &dst_port[j]);
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
pkts_burst[j + i]->port = dst_port[j + i];
diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h
index 0c6852a7bb..adb82f1478 100644
--- a/examples/l3fwd/l3fwd_lpm_altivec.h
+++ b/examples/l3fwd/l3fwd_lpm_altivec.h
@@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint8_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__vector unsigned int dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
ipv4_flag[j / FWDSTEP],
portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h
index 78ee83b76c..2a68c4c15e 100644
--- a/examples/l3fwd/l3fwd_lpm_neon.h
+++ b/examples/l3fwd/l3fwd_lpm_neon.h
@@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf,
}
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i = 0, j = 0;
- uint16_t dst_port[MAX_PKT_BURST];
int32x4_t dip;
uint32_t ipv4_flag;
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
void *));
}
-
for (j = 0; j != k - FWDSTEP; j += FWDSTEP) {
for (i = 0; i < FWDSTEP; i++) {
rte_prefetch0(rte_pktmbuf_mtod(
@@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid,
&pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
}
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j],
&dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
j += FWDSTEP;
}
@@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
void *));
j++;
}
-
j -= m;
/* Classify last up to 3 packets one by one */
switch (m) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
}
}
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
index 3f637a23d1..db15030320 100644
--- a/examples/l3fwd/l3fwd_lpm_sse.h
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__m128i dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step2(qconf, dip[j / FWDSTEP],
ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH 3/5] examples/l3fwd: use lpm vector path for event vector
2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-08-29 9:44 ` [PATCH 2/5] examples/l3fwd: split processing and send stages pbhagavatula
@ 2022-08-29 9:44 ` pbhagavatula
2022-08-29 9:44 ` [PATCH 4/5] examples/l3fwd: use em " pbhagavatula
` (2 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use lpm vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_altivec.h | 28 ++++++++++++++++
examples/l3fwd/l3fwd_event.h | 58 ++++++++++++++++++++++++++++++++++
examples/l3fwd/l3fwd_lpm.c | 33 +++++++++----------
examples/l3fwd/l3fwd_neon.h | 43 +++++++++++++++++++++++++
examples/l3fwd/l3fwd_sse.h | 44 ++++++++++++++++++++++++++
5 files changed, 190 insertions(+), 16 deletions(-)
diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h
index 87018f5dbe..00a80225cd 100644
--- a/examples/l3fwd/l3fwd_altivec.h
+++ b/examples/l3fwd/l3fwd_altivec.h
@@ -222,4 +222,32 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __vector unsigned short dp = vec_splats((short)dst_ports[0]);
+ __vector unsigned short dp1;
+
+ dp1 = *((__vector unsigned short *)&dst_ports[i]);
+ res = vec_all_eq(dp1, dp);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_ALTIVEC_H_ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index b93841a16f..26c3254004 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -14,6 +14,14 @@
#include "l3fwd.h"
+#if defined(RTE_ARCH_X86)
+#include "l3fwd_sse.h"
+#elif defined __ARM_NEON
+#include "l3fwd_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "l3fwd_altivec.h"
+#endif
+
#define L3FWD_EVENT_SINGLE 0x1
#define L3FWD_EVENT_BURST 0x2
#define L3FWD_EVENT_TX_DIRECT 0x4
@@ -103,7 +111,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
}
}
+static inline uint16_t
+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
+ uint16_t nb_pkts)
+{
+ uint16_t *des_pos, free = 0;
+ struct rte_mbuf **pos;
+ int i;
+
+ /* Filter out and free bad packets */
+ for (i = 0; i < nb_pkts; i++) {
+ if (dst_port[i] == BAD_PORT) {
+ rte_pktmbuf_free(mbufs[i]);
+ if (!free) {
+ pos = &mbufs[i];
+ des_pos = &dst_port[i];
+ }
+ free++;
+ continue;
+ }
+
+ if (free) {
+ *pos = mbufs[i];
+ pos++;
+ *des_pos = dst_port[i];
+ des_pos++;
+ }
+ }
+
+ return nb_pkts - free;
+}
+
+static inline void
+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
+{
+ uint16_t port, i;
+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem);
+ /* Verify destination array */
+ port = process_dst_port(dst_port, vec->nb_elem);
+ if (port == BAD_PORT) {
+ vec->attr_valid = 0;
+ for (i = 0; i < vec->nb_elem; i++) {
+ vec->mbufs[i]->port = dst_port[i];
+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
+ }
+ } else {
+ vec->attr_valid = 1;
+ vec->port = port;
+ vec->queue = 0;
+ }
+}
struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index d1b850dd5b..3f67ab01d4 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -425,24 +425,22 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf)
+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf,
+ uint16_t *dst_port)
{
struct rte_mbuf **mbufs = vec->mbufs;
int i;
- /* Process first packet to init vector attributes */
- lpm_process_event_pkt(lconf, mbufs[0]);
if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
+ dst_port, lconf, 1);
+ } else {
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
+ &dst_port[i], lconf, 1);
}
- for (i = 1; i < vec->nb_elem; i++) {
- lpm_process_event_pkt(lconf, mbufs[i]);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ process_event_vector(vec, dst_port);
}
/* Same eventdev loop for single and burst of vector */
@@ -458,6 +456,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
struct rte_event events[MAX_PKT_BURST];
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
+ uint16_t *dst_port_list;
unsigned int lcore_id;
if (event_p_id < 0)
@@ -465,7 +464,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
-
+ dst_port_list =
+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_port_list == NULL)
+ return;
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
while (!force_quit) {
@@ -483,10 +486,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- lpm_process_event_vector(events[i].vec, lconf);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ lpm_process_event_vector(events[i].vec, lconf,
+ dst_port_list);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
index ce515e0bc4..60e6a310e0 100644
--- a/examples/l3fwd/l3fwd_neon.h
+++ b/examples/l3fwd/l3fwd_neon.h
@@ -194,4 +194,47 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
+ uint16x8_t dp1;
+
+ dp1 = vld1q_u16(&dst_ports[i]);
+ dp1 = vceqq_u16(dp1, dp);
+ res = vminvq_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ uint16x4_t dp = vdup_n_u16(dst_ports[0]);
+ uint16x4_t dp1;
+
+ dp1 = vld1_u16(&dst_ports[i]);
+ dp1 = vceq_u16(dp1, dp);
+ res = vminv_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_NEON_H_ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index 0f0d0323a2..083729cdef 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ res = _mm_movemask_epi8(dp1);
+ if (res != 0xFFFF)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
+ res = _mm_movemask_ps((__m128)dp1);
+ if (res != 0xF)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_SSE_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH 4/5] examples/l3fwd: use em vector path for event vector
2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-08-29 9:44 ` [PATCH 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-08-29 9:44 ` [PATCH 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
@ 2022-08-29 9:44 ` pbhagavatula
2022-08-29 9:44 ` [PATCH 5/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use em vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em.c | 10 ++--
examples/l3fwd/l3fwd_em_hlm.h | 72 +++++-----------------------
examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++----
examples/l3fwd/l3fwd_event.h | 21 --------
4 files changed, 35 insertions(+), 93 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 10be24c61d..ac475073d7 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -852,10 +852,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
unsigned int lcore_id;
+ uint16_t *dst_ports;
if (event_p_id < 0)
return;
+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_ports == NULL)
+ return;
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
@@ -877,13 +882,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
}
#if defined RTE_ARCH_X86 || defined __ARM_NEON
- l3fwd_em_process_event_vector(events[i].vec, lconf);
+ l3fwd_em_process_event_vector(events[i].vec, lconf,
+ dst_ports);
#else
l3fwd_em_no_opt_process_event_vector(events[i].vec,
lconf);
#endif
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 12b997e477..2e11eefad7 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_port)
{
- struct rte_mbuf **mbufs = vec->mbufs;
- uint16_t dst_port[MAX_PKT_BURST];
- int32_t i, j, n, pos;
-
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++)
- rte_prefetch0(
- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1);
+ uint16_t i;
if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
- uint32_t pkt_type =
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP;
- uint32_t l3_type, tcp_or_udp;
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
- pkt_type &= mbufs[j + i]->packet_type;
-
- l3_type = pkt_type & RTE_PTYPE_L3_MASK;
- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
-
- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem;
- i++, pos++) {
- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos],
- struct rte_ether_hdr *) +
- 1);
- }
-
- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else {
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port =
- em_get_dst_port(qconf, mbufs[j + i],
- mbufs[j + i]->port);
- process_packet(mbufs[j + i],
- &mbufs[j + i]->port);
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- continue;
- }
- processx4_step3(&mbufs[j], &dst_port[j]);
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port = dst_port[j + i];
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- }
-
- for (; j < vec->nb_elem; j++) {
- mbufs[j]->port =
- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port);
- process_packet(mbufs[j], &mbufs[j]->port);
- event_vector_attr_validate(vec, mbufs[j]);
- }
+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port,
+ vec->port, qconf, 1);
+ else
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_em_process_packets(1, &vec->mbufs[i],
+ &dst_port[i],
+ vec->mbufs[i]->port, qconf, 1);
+
+ process_event_vector(vec, dst_port);
}
#endif /* __L3FWD_EM_HLM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index d2f75edb8a..067f23889a 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events,
for (i = 1, j = 0; j < nb_rx; i++, j++) {
struct rte_mbuf *mbuf = events[j]->mbuf;
+ uint16_t port;
if (i < nb_rx) {
rte_prefetch0(rte_pktmbuf_mtod(
events[i]->mbuf,
struct rte_ether_hdr *) + 1);
}
+ port = mbuf->port;
mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
process_packet(mbuf, &mbuf->port);
+ if (mbuf->port == BAD_PORT)
+ mbuf->port = port;
}
}
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_ports)
{
+ const uint8_t attr_valid = vec->attr_valid;
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i, j;
rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1);
- if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
for (i = 0, j = 1; i < vec->nb_elem; i++, j++) {
if (j < vec->nb_elem)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[j],
struct rte_ether_hdr *) +
1);
- mbufs[i]->port =
- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port);
- process_packet(mbufs[i], &mbufs[i]->port);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
+ attr_valid ? vec->port :
+ mbufs[i]->port);
}
+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != j; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &dst_ports[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &dst_ports[i]);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_SEQUENTIAL_H__ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index 26c3254004..2844cc4dd6 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -90,27 +90,6 @@ struct l3fwd_event_resources {
uint64_t vector_tmo_ns;
};
-static inline void
-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
-{
- /* l3fwd application only changes mbuf port while processing */
- if (vec->attr_valid && (vec->port != mbuf->port))
- vec->attr_valid = 0;
-}
-
-static inline void
-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
-{
- if (vec->attr_valid) {
- vec->queue = txq;
- } else {
- int i;
-
- for (i = 0; i < vec->nb_elem; i++)
- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq);
- }
-}
-
static inline uint16_t
filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
uint16_t nb_pkts)
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH 5/5] examples/l3fwd: fix event vector processing in fib
2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (2 preceding siblings ...)
2022-08-29 9:44 ` [PATCH 4/5] examples/l3fwd: use em " pbhagavatula
@ 2022-08-29 9:44 ` pbhagavatula
2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix stack overflow when event vector size is greater then
MAX_BURST_SIZE.
Add missing mac swap and rfc1812 stage.
Fixes: e8adca1951d4 ("examples/l3fwd: support event vector")
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_fib.c | 85 +++++++++++++++++++++++++++-----------
1 file changed, 62 insertions(+), 23 deletions(-)
diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c
index e02e4b3f5a..80f0330c69 100644
--- a/examples/l3fwd/l3fwd_fib.c
+++ b/examples/l3fwd/l3fwd_fib.c
@@ -261,7 +261,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
uint32_t ipv4_arr[MAX_PKT_BURST];
uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
- uint16_t nh;
+ uint16_t nh, hops[MAX_PKT_BURST];
uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
uint32_t ipv4_arr_assem, ipv6_arr_assem;
@@ -350,7 +350,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- events[i].mbuf->port = nh;
+ hops[i] = nh != FIB_DEFAULT_HOP ?
+ nh :
+ events[i].mbuf->port;
+ process_packet(events[i].mbuf, &hops[i]);
+ events[i].mbuf->port = hops[i] != BAD_PORT ?
+ hops[i] :
+ events[i].mbuf->port;
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -418,14 +424,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-fib_process_event_vector(struct rte_event_vector *vec)
+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr,
+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6,
+ uint32_t *ipv4_arr, uint16_t *hops)
{
- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
uint32_t ipv4_arr_assem, ipv6_arr_assem;
struct rte_mbuf **mbufs = vec->mbufs;
- uint32_t ipv4_arr[MAX_PKT_BURST];
- uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
struct lcore_conf *lconf;
uint16_t nh;
@@ -463,16 +467,10 @@ fib_process_event_vector(struct rte_event_vector *vec)
/* Lookup IPv6 hops if IPv6 packets are present. */
if (ipv6_cnt > 0)
- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr,
- hopsv6, ipv6_cnt);
-
- if (vec->attr_valid) {
- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0];
- if (nh != FIB_DEFAULT_HOP)
- vec->port = nh;
- else
- vec->attr_valid = 0;
- }
+ rte_fib6_lookup_bulk(
+ lconf->ipv6_lookup_struct,
+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6,
+ ipv6_cnt);
/* Assign ports looked up in fib depending on IPv4 or IPv6 */
for (i = 0; i < vec->nb_elem; i++) {
@@ -481,9 +479,26 @@ fib_process_event_vector(struct rte_event_vector *vec)
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- mbufs[i]->port = nh;
- event_vector_attr_validate(vec, mbufs[i]);
+ hops[i] = nh;
+ else
+ hops[i] = vec->attr_valid ? vec->port :
+ vec->mbufs[i]->port;
}
+
+#if defined FIB_SEND_MULTI
+ uint16_t k;
+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != k; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &hops[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#endif
+
+ process_event_vector(vec, hops);
}
static __rte_always_inline void
@@ -496,7 +511,32 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
const uint8_t event_d_id = evt_rsrc->event_d_id;
const uint16_t deq_len = evt_rsrc->deq_depth;
struct rte_event events[MAX_PKT_BURST];
+ uint8_t *type_arr, **ipv6_arr, *ptr;
int nb_enq = 0, nb_deq = 0, i;
+ uint64_t *hopsv4, *hopsv6;
+ uint32_t *ipv4_arr;
+ uint16_t *hops;
+ uintptr_t mem;
+
+ mem = (uintptr_t)rte_zmalloc(
+ "vector_fib",
+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) +
+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) +
+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) *
+ evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (mem == 0)
+ return;
+ ipv4_arr = (uint32_t *)mem;
+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size];
+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size];
+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size];
+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size];
+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size];
+
+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size];
+ for (i = 0; i < evt_rsrc->vector_size; i++)
+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i];
if (event_p_id < 0)
return;
@@ -519,10 +559,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- fib_process_event_vector(events[i].vec);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ fib_process_event_vector(events[i].vec, type_arr,
+ ipv6_arr, hopsv4, hopsv6,
+ ipv4_arr, hops);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 1/5] examples/l3fwd: fix port group mask generation
2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (3 preceding siblings ...)
2022-08-29 9:44 ` [PATCH 5/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
@ 2022-09-02 9:18 ` pbhagavatula
2022-09-02 9:18 ` [PATCH v2 2/5] examples/l3fwd: split processing and send stages pbhagavatula
` (5 more replies)
4 siblings, 6 replies; 41+ messages in thread
From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw)
To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix port group mask generation in altivec, vec_any_eq returns
0 or 1 while port_groupx4 expects comparison mask result.
Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc")
Cc: stable@dpdk.org
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v2 Changes:
- Fix PPC, RISC-V, aarch32 compilation.
examples/common/altivec/port_group.h | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h
index 5e209b02fa..592ef80b7f 100644
--- a/examples/common/altivec/port_group.h
+++ b/examples/common/altivec/port_group.h
@@ -26,12 +26,19 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
uint16_t u16[FWDSTEP + 1];
uint64_t u64;
} *pnum = (void *)pn;
+ union u_vec {
+ __vector unsigned short v_us;
+ unsigned short s[8];
+ };
+ union u_vec res;
int32_t v;
- v = vec_any_eq(dp1, dp2);
-
+ dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2);
+ res.v_us = dp1;
+ v = (res.s[0] & 0x1) | (res.s[1] & 0x2) | (res.s[2] & 0x4) |
+ (res.s[3] & 0x8);
/* update last port counter. */
lp[0] += gptbl[v].lpv;
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 2/5] examples/l3fwd: split processing and send stages
2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
@ 2022-09-02 9:18 ` pbhagavatula
2022-09-02 9:18 ` [PATCH v2 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
` (4 subsequent siblings)
5 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Split packet processing from packet send stage, as send stage
is not common for poll and event mode.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++-----------
examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++---
examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++-------
examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++---
4 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index e76f2760b0..12b997e477 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
return portid;
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t *dst_port, uint16_t portid,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i, j, pos;
- uint16_t dst_port[MAX_PKT_BURST];
/*
* Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets
@@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
dst_port[j + i] = em_get_dst_port(qconf,
pkts_burst[j + i], portid);
}
+
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
}
- for (; j < nb_rx; j++)
+ for (; j < nb_rx; j++) {
dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &pkts_burst[j]->port);
+ }
+}
- send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0);
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
/*
@@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
*/
int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) {
+ for (j = 0; j < nb_rx; j++)
pkts_burst[j] = ev[j]->mbuf;
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *) + 1);
- }
for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
@@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
}
continue;
}
- processx4_step3(&pkts_burst[j], &dst_port[j]);
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
pkts_burst[j + i]->port = dst_port[j + i];
diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h
index 0c6852a7bb..adb82f1478 100644
--- a/examples/l3fwd/l3fwd_lpm_altivec.h
+++ b/examples/l3fwd/l3fwd_lpm_altivec.h
@@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint8_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__vector unsigned int dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
ipv4_flag[j / FWDSTEP],
portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h
index 78ee83b76c..2a68c4c15e 100644
--- a/examples/l3fwd/l3fwd_lpm_neon.h
+++ b/examples/l3fwd/l3fwd_lpm_neon.h
@@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf,
}
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i = 0, j = 0;
- uint16_t dst_port[MAX_PKT_BURST];
int32x4_t dip;
uint32_t ipv4_flag;
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
void *));
}
-
for (j = 0; j != k - FWDSTEP; j += FWDSTEP) {
for (i = 0; i < FWDSTEP; i++) {
rte_prefetch0(rte_pktmbuf_mtod(
@@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid,
&pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
}
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j],
&dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
j += FWDSTEP;
}
@@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
void *));
j++;
}
-
j -= m;
/* Classify last up to 3 packets one by one */
switch (m) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
}
}
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
index 3f637a23d1..db15030320 100644
--- a/examples/l3fwd/l3fwd_lpm_sse.h
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__m128i dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step2(qconf, dip[j / FWDSTEP],
ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 3/5] examples/l3fwd: use lpm vector path for event vector
2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-09-02 9:18 ` [PATCH v2 2/5] examples/l3fwd: split processing and send stages pbhagavatula
@ 2022-09-02 9:18 ` pbhagavatula
2022-09-02 9:18 ` [PATCH v2 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
` (3 subsequent siblings)
5 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use lpm vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++
examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++
examples/l3fwd/l3fwd_lpm.c | 38 ++++++++++--------
examples/l3fwd/l3fwd_neon.h | 45 +++++++++++++++++++++
examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++
5 files changed, 211 insertions(+), 16 deletions(-)
diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h
index 87018f5dbe..e45e138e59 100644
--- a/examples/l3fwd/l3fwd_altivec.h
+++ b/examples/l3fwd/l3fwd_altivec.h
@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __vector unsigned short dp1;
+ __vector unsigned short dp;
+
+ dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
+ dp1 = *((__vector unsigned short *)&dst_ports[i]);
+ res = vec_all_eq(dp1, dp);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_ALTIVEC_H_ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index b93841a16f..3fe38aada0 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -82,6 +82,27 @@ struct l3fwd_event_resources {
uint64_t vector_tmo_ns;
};
+#if defined(RTE_ARCH_X86)
+#include "l3fwd_sse.h"
+#elif defined __ARM_NEON
+#include "l3fwd_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "l3fwd_altivec.h"
+#else
+static inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ int i;
+
+ for (i = 0; i < nb_elem; i++) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ }
+
+ return dst_ports[0];
+}
+#endif
+
static inline void
event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
{
@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
}
}
+static inline uint16_t
+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
+ uint16_t nb_pkts)
+{
+ uint16_t *des_pos, free = 0;
+ struct rte_mbuf **pos;
+ int i;
+
+ /* Filter out and free bad packets */
+ for (i = 0; i < nb_pkts; i++) {
+ if (dst_port[i] == BAD_PORT) {
+ rte_pktmbuf_free(mbufs[i]);
+ if (!free) {
+ pos = &mbufs[i];
+ des_pos = &dst_port[i];
+ }
+ free++;
+ continue;
+ }
+
+ if (free) {
+ *pos = mbufs[i];
+ pos++;
+ *des_pos = dst_port[i];
+ des_pos++;
+ }
+ }
+ return nb_pkts - free;
+}
+
+static inline void
+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
+{
+ uint16_t port, i;
+
+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem);
+ /* Verify destination array */
+ port = process_dst_port(dst_port, vec->nb_elem);
+ if (port == BAD_PORT) {
+ vec->attr_valid = 0;
+ for (i = 0; i < vec->nb_elem; i++) {
+ vec->mbufs[i]->port = dst_port[i];
+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
+ }
+ } else {
+ vec->attr_valid = 1;
+ vec->port = port;
+ vec->queue = 0;
+ }
+}
struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index d1b850dd5b..1652b7c470 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf)
+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf,
+ uint16_t *dst_port)
{
struct rte_mbuf **mbufs = vec->mbufs;
int i;
- /* Process first packet to init vector attributes */
- lpm_process_event_pkt(lconf, mbufs[0]);
+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64
if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
+ dst_port, lconf, 1);
+ } else {
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
+ &dst_port[i], lconf, 1);
}
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]);
+#endif
- for (i = 1; i < vec->nb_elem; i++) {
- lpm_process_event_pkt(lconf, mbufs[i]);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ process_event_vector(vec, dst_port);
}
/* Same eventdev loop for single and burst of vector */
@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
struct rte_event events[MAX_PKT_BURST];
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
+ uint16_t *dst_port_list;
unsigned int lcore_id;
if (event_p_id < 0)
@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
-
+ dst_port_list =
+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_port_list == NULL)
+ return;
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
while (!force_quit) {
@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- lpm_process_event_vector(events[i].vec, lconf);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ lpm_process_event_vector(events[i].vec, lconf,
+ dst_port_list);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
index ce515e0bc4..4d98288707 100644
--- a/examples/l3fwd/l3fwd_neon.h
+++ b/examples/l3fwd/l3fwd_neon.h
@@ -194,4 +194,49 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+#if defined(RTE_ARCH_ARM64)
+ while (nb_elem > 7) {
+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
+ uint16x8_t dp1;
+
+ dp1 = vld1q_u16(&dst_ports[i]);
+ dp1 = vceqq_u16(dp1, dp);
+ res = vminvq_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ uint16x4_t dp = vdup_n_u16(dst_ports[0]);
+ uint16x4_t dp1;
+
+ dp1 = vld1_u16(&dst_ports[i]);
+ dp1 = vceq_u16(dp1, dp);
+ res = vminv_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+#endif
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_NEON_H_ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index 0f0d0323a2..083729cdef 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ res = _mm_movemask_epi8(dp1);
+ if (res != 0xFFFF)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
+ res = _mm_movemask_ps((__m128)dp1);
+ if (res != 0xF)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_SSE_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 4/5] examples/l3fwd: fix event vector processing in fib
2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-09-02 9:18 ` [PATCH v2 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-09-02 9:18 ` [PATCH v2 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
@ 2022-09-02 9:18 ` pbhagavatula
2022-09-02 9:18 ` [PATCH v2 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
` (2 subsequent siblings)
5 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix stack overflow when event vector size is greater than
MAX_BURST_SIZE.
Add missing mac swap and rfc1812 stage.
Fixes: e8adca1951d4 ("examples/l3fwd: support event vector")
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_fib.c | 124 ++++++++++++++++++++++++++-----------
1 file changed, 87 insertions(+), 37 deletions(-)
diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c
index e02e4b3f5a..ada5d0d430 100644
--- a/examples/l3fwd/l3fwd_fib.c
+++ b/examples/l3fwd/l3fwd_fib.c
@@ -77,27 +77,38 @@ fib_parse_packet(struct rte_mbuf *mbuf,
*/
#if !defined FIB_SEND_MULTI
static inline void
-fib_send_single(int nb_tx, struct lcore_conf *qconf,
- struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+process_packet(struct rte_mbuf *pkt, uint16_t *hop)
{
- int32_t j;
struct rte_ether_hdr *eth_hdr;
- for (j = 0; j < nb_tx; j++) {
- /* Run rfc1812 if packet is ipv4 and checks enabled. */
+ /* Run rfc1812 if packet is ipv4 and checks enabled. */
#if defined DO_RFC_1812_CHECKS
- rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
- pkts_burst[j], struct rte_ether_hdr *) + 1),
- &hops[j], pkts_burst[j]->packet_type);
+ rfc1812_process(
+ (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
+ pkt, struct rte_ether_hdr *) +
+ 1),
+ hop, pkt->packet_type,
+ pkt->ol_flags & RTE_MBUF_F_RX_IP_CKSUM_MASK);
#endif
- /* Set MAC addresses. */
- eth_hdr = rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *);
- *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]];
- rte_ether_addr_copy(&ports_eth_addr[hops[j]],
- ð_hdr->src_addr);
+ /* Set MAC addresses. */
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+ *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop];
+ rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr);
+}
+
+static inline void
+fib_send_single(int nb_tx, struct lcore_conf *qconf,
+ struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+{
+ int32_t j;
+ for (j = 0; j < nb_tx; j++) {
+ process_packet(pkts_burst[j], &hops[j]);
+ if (hops[j] == BAD_PORT) {
+ rte_pktmbuf_free(pkts_burst[j]);
+ continue;
+ }
/* Send single packet. */
send_single_packet(qconf, pkts_burst[j], hops[j]);
}
@@ -261,7 +272,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
uint32_t ipv4_arr[MAX_PKT_BURST];
uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
- uint16_t nh;
+ uint16_t nh, hops[MAX_PKT_BURST];
uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
uint32_t ipv4_arr_assem, ipv6_arr_assem;
@@ -350,7 +361,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- events[i].mbuf->port = nh;
+ hops[i] = nh != FIB_DEFAULT_HOP ?
+ nh :
+ events[i].mbuf->port;
+ process_packet(events[i].mbuf, &hops[i]);
+ events[i].mbuf->port = hops[i] != BAD_PORT ?
+ hops[i] :
+ events[i].mbuf->port;
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -418,14 +435,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-fib_process_event_vector(struct rte_event_vector *vec)
+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr,
+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6,
+ uint32_t *ipv4_arr, uint16_t *hops)
{
- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
uint32_t ipv4_arr_assem, ipv6_arr_assem;
struct rte_mbuf **mbufs = vec->mbufs;
- uint32_t ipv4_arr[MAX_PKT_BURST];
- uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
struct lcore_conf *lconf;
uint16_t nh;
@@ -463,16 +478,10 @@ fib_process_event_vector(struct rte_event_vector *vec)
/* Lookup IPv6 hops if IPv6 packets are present. */
if (ipv6_cnt > 0)
- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr,
- hopsv6, ipv6_cnt);
-
- if (vec->attr_valid) {
- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0];
- if (nh != FIB_DEFAULT_HOP)
- vec->port = nh;
- else
- vec->attr_valid = 0;
- }
+ rte_fib6_lookup_bulk(
+ lconf->ipv6_lookup_struct,
+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6,
+ ipv6_cnt);
/* Assign ports looked up in fib depending on IPv4 or IPv6 */
for (i = 0; i < vec->nb_elem; i++) {
@@ -481,9 +490,26 @@ fib_process_event_vector(struct rte_event_vector *vec)
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- mbufs[i]->port = nh;
- event_vector_attr_validate(vec, mbufs[i]);
+ hops[i] = nh;
+ else
+ hops[i] = vec->attr_valid ? vec->port :
+ vec->mbufs[i]->port;
}
+
+#if defined FIB_SEND_MULTI
+ uint16_t k;
+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != k; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &hops[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#endif
+
+ process_event_vector(vec, hops);
}
static __rte_always_inline void
@@ -496,7 +522,32 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
const uint8_t event_d_id = evt_rsrc->event_d_id;
const uint16_t deq_len = evt_rsrc->deq_depth;
struct rte_event events[MAX_PKT_BURST];
+ uint8_t *type_arr, **ipv6_arr, *ptr;
int nb_enq = 0, nb_deq = 0, i;
+ uint64_t *hopsv4, *hopsv6;
+ uint32_t *ipv4_arr;
+ uint16_t *hops;
+ uintptr_t mem;
+
+ mem = (uintptr_t)rte_zmalloc(
+ "vector_fib",
+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) +
+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) +
+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) *
+ evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (mem == 0)
+ return;
+ ipv4_arr = (uint32_t *)mem;
+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size];
+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size];
+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size];
+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size];
+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size];
+
+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size];
+ for (i = 0; i < evt_rsrc->vector_size; i++)
+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i];
if (event_p_id < 0)
return;
@@ -519,10 +570,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- fib_process_event_vector(events[i].vec);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ fib_process_event_vector(events[i].vec, type_arr,
+ ipv6_arr, hopsv4, hopsv6,
+ ipv4_arr, hops);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 5/5] examples/l3fwd: use em vector path for event vector
2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (2 preceding siblings ...)
2022-09-02 9:18 ` [PATCH v2 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
@ 2022-09-02 9:18 ` pbhagavatula
2022-09-08 18:33 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation David Christensen
2022-09-11 18:12 ` [PATCH v3 " pbhagavatula
5 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use em vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em.c | 12 +++--
examples/l3fwd/l3fwd_em.h | 29 +++++------
examples/l3fwd/l3fwd_em_hlm.h | 72 +++++-----------------------
examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++----
examples/l3fwd/l3fwd_event.h | 21 --------
5 files changed, 47 insertions(+), 112 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 10be24c61d..e7b35cfbd9 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -852,10 +852,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
unsigned int lcore_id;
+ uint16_t *dst_ports;
if (event_p_id < 0)
return;
+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_ports == NULL)
+ return;
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
@@ -877,13 +882,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
}
#if defined RTE_ARCH_X86 || defined __ARM_NEON
- l3fwd_em_process_event_vector(events[i].vec, lconf);
+ l3fwd_em_process_event_vector(events[i].vec, lconf,
+ dst_ports);
#else
l3fwd_em_no_opt_process_event_vector(events[i].vec,
- lconf);
+ lconf, dst_ports);
#endif
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h
index fe2ee59f6a..7d051fc076 100644
--- a/examples/l3fwd/l3fwd_em.h
+++ b/examples/l3fwd/l3fwd_em.h
@@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid,
}
}
-static __rte_always_inline void
+static __rte_always_inline uint16_t
l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
{
struct rte_ether_hdr *eth_hdr;
@@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf);
else
m->port = BAD_PORT;
+
+ return m->port;
}
/*
@@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events,
static inline void
l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf,
+ uint16_t *dst_ports)
{
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i;
@@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *));
- /* Process first packet to init vector attributes */
- l3fwd_em_simple_process(mbufs[0], qconf);
- if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
- }
-
/*
* Prefetch and forward already prefetched packets.
*/
- for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
+ for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
rte_prefetch0(
rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *));
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
}
/* Forward remaining prefetched packets */
- for (; i < vec->nb_elem; i++) {
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ for (; i < vec->nb_elem; i++)
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 12b997e477..2e11eefad7 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_port)
{
- struct rte_mbuf **mbufs = vec->mbufs;
- uint16_t dst_port[MAX_PKT_BURST];
- int32_t i, j, n, pos;
-
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++)
- rte_prefetch0(
- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1);
+ uint16_t i;
if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
- uint32_t pkt_type =
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP;
- uint32_t l3_type, tcp_or_udp;
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
- pkt_type &= mbufs[j + i]->packet_type;
-
- l3_type = pkt_type & RTE_PTYPE_L3_MASK;
- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
-
- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem;
- i++, pos++) {
- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos],
- struct rte_ether_hdr *) +
- 1);
- }
-
- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else {
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port =
- em_get_dst_port(qconf, mbufs[j + i],
- mbufs[j + i]->port);
- process_packet(mbufs[j + i],
- &mbufs[j + i]->port);
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- continue;
- }
- processx4_step3(&mbufs[j], &dst_port[j]);
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port = dst_port[j + i];
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- }
-
- for (; j < vec->nb_elem; j++) {
- mbufs[j]->port =
- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port);
- process_packet(mbufs[j], &mbufs[j]->port);
- event_vector_attr_validate(vec, mbufs[j]);
- }
+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port,
+ vec->port, qconf, 1);
+ else
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_em_process_packets(1, &vec->mbufs[i],
+ &dst_port[i],
+ vec->mbufs[i]->port, qconf, 1);
+
+ process_event_vector(vec, dst_port);
}
#endif /* __L3FWD_EM_HLM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index d2f75edb8a..067f23889a 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events,
for (i = 1, j = 0; j < nb_rx; i++, j++) {
struct rte_mbuf *mbuf = events[j]->mbuf;
+ uint16_t port;
if (i < nb_rx) {
rte_prefetch0(rte_pktmbuf_mtod(
events[i]->mbuf,
struct rte_ether_hdr *) + 1);
}
+ port = mbuf->port;
mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
process_packet(mbuf, &mbuf->port);
+ if (mbuf->port == BAD_PORT)
+ mbuf->port = port;
}
}
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_ports)
{
+ const uint8_t attr_valid = vec->attr_valid;
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i, j;
rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1);
- if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
for (i = 0, j = 1; i < vec->nb_elem; i++, j++) {
if (j < vec->nb_elem)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[j],
struct rte_ether_hdr *) +
1);
- mbufs[i]->port =
- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port);
- process_packet(mbufs[i], &mbufs[i]->port);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
+ attr_valid ? vec->port :
+ mbufs[i]->port);
}
+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != j; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &dst_ports[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &dst_ports[i]);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_SEQUENTIAL_H__ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index 3fe38aada0..e21817c36b 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
}
#endif
-static inline void
-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
-{
- /* l3fwd application only changes mbuf port while processing */
- if (vec->attr_valid && (vec->port != mbuf->port))
- vec->attr_valid = 0;
-}
-
-static inline void
-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
-{
- if (vec->attr_valid) {
- vec->queue = txq;
- } else {
- int i;
-
- for (i = 0; i < vec->nb_elem; i++)
- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq);
- }
-}
-
static inline uint16_t
filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
uint16_t nb_pkts)
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH v2 1/5] examples/l3fwd: fix port group mask generation
2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (3 preceding siblings ...)
2022-09-02 9:18 ` [PATCH v2 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
@ 2022-09-08 18:33 ` David Christensen
2022-09-09 5:56 ` [EXT] " Pavan Nikhilesh Bhagavatula
2022-09-11 18:12 ` [PATCH v3 " pbhagavatula
5 siblings, 1 reply; 41+ messages in thread
From: David Christensen @ 2022-09-08 18:33 UTC (permalink / raw)
To: pbhagavatula, jerinj; +Cc: dev, stable
On 9/2/22 2:18 AM, pbhagavatula@marvell.com wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Fix port group mask generation in altivec, vec_any_eq returns
> 0 or 1 while port_groupx4 expects comparison mask result.
>
> Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc")
> Cc: stable@dpdk.org
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> v2 Changes:
> - Fix PPC, RISC-V, aarch32 compilation.
>
> examples/common/altivec/port_group.h | 11 +++++++++--
> 1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h
> index 5e209b02fa..592ef80b7f 100644
> --- a/examples/common/altivec/port_group.h
> +++ b/examples/common/altivec/port_group.h
> @@ -26,12 +26,19 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
> uint16_t u16[FWDSTEP + 1];
> uint64_t u64;
> } *pnum = (void *)pn;
> + union u_vec {
> + __vector unsigned short v_us;
> + unsigned short s[8];
> + };
>
> + union u_vec res;
> int32_t v;
>
> - v = vec_any_eq(dp1, dp2);
> -
> + dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2);
Altivec vec_cmpeq() is similar to Intel _mm_cmpeq_*(), so this looks
right to me.
> + res.v_us = dp1;
>
> + v = (res.s[0] & 0x1) | (res.s[1] & 0x2) | (res.s[2] & 0x4) |
> + (res.s[3] & 0x8);
This can be vectorized too. The Intel _mm_unpacklo_epi16() intrinsic
can be replaced with the following Altivec code:
extern __inline __m128i __attribute__((__gnu_inline__,
__always_inline__, __artificial__))
_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
{
return (__m128i) vec_mergeh ((__v8hi)__A, (__v8hi)__B);
}
The Intel _mm_movemask_ps() intrinsic can be replaced with the following
Altivec implementation:
/* Creates a 4-bit mask from the most significant bits of the SPFP
values. */
extern __inline int __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_movemask_ps (__m128 __A)
{
__vector unsigned long long result;
static const __vector unsigned int perm_mask =
{
#ifdef __LITTLE_ENDIAN__
0x00204060, 0x80808080, 0x80808080, 0x80808080
#else
0x80808080, 0x80808080, 0x80808080, 0x00204060
#endif
};
result = ((__vector unsigned long long)
vec_vbpermq ((__vector unsigned char) __A,
(__vector unsigned char) perm_mask));
#ifdef __LITTLE_ENDIAN__
return result[1];
#else
return result[0];
#endif
}
Dave
^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] Re: [PATCH v2 1/5] examples/l3fwd: fix port group mask generation
2022-09-08 18:33 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation David Christensen
@ 2022-09-09 5:56 ` Pavan Nikhilesh Bhagavatula
0 siblings, 0 replies; 41+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2022-09-09 5:56 UTC (permalink / raw)
To: David Christensen, Jerin Jacob Kollanukkaran; +Cc: dev, stable
> On 9/2/22 2:18 AM, pbhagavatula@marvell.com wrote:
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> > Fix port group mask generation in altivec, vec_any_eq returns
> > 0 or 1 while port_groupx4 expects comparison mask result.
> >
> > Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on
> powerpc")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> > v2 Changes:
> > - Fix PPC, RISC-V, aarch32 compilation.
> >
> > examples/common/altivec/port_group.h | 11 +++++++++--
> > 1 file changed, 9 insertions(+), 2 deletions(-)
> >
> > diff --git a/examples/common/altivec/port_group.h
> b/examples/common/altivec/port_group.h
> > index 5e209b02fa..592ef80b7f 100644
> > --- a/examples/common/altivec/port_group.h
> > +++ b/examples/common/altivec/port_group.h
> > @@ -26,12 +26,19 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t
> *lp,
> > uint16_t u16[FWDSTEP + 1];
> > uint64_t u64;
> > } *pnum = (void *)pn;
> > + union u_vec {
> > + __vector unsigned short v_us;
> > + unsigned short s[8];
> > + };
> >
> > + union u_vec res;
> > int32_t v;
> >
> > - v = vec_any_eq(dp1, dp2);
> > -
> > + dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2);
>
> Altivec vec_cmpeq() is similar to Intel _mm_cmpeq_*(), so this looks
> right to me.
>
> > + res.v_us = dp1;
> >
> > + v = (res.s[0] & 0x1) | (res.s[1] & 0x2) | (res.s[2] & 0x4) |
> > + (res.s[3] & 0x8);
>
> This can be vectorized too. The Intel _mm_unpacklo_epi16() intrinsic
> can be replaced with the following Altivec code:
>
> extern __inline __m128i __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
> {
> return (__m128i) vec_mergeh ((__v8hi)__A, (__v8hi)__B);
> }
>
> The Intel _mm_movemask_ps() intrinsic can be replaced with the following
> Altivec implementation:
>
> /* Creates a 4-bit mask from the most significant bits of the SPFP
> values. */
> extern __inline int __attribute__((__gnu_inline__, __always_inline__,
> __artificial__))
> _mm_movemask_ps (__m128 __A)
> {
> __vector unsigned long long result;
> static const __vector unsigned int perm_mask =
> {
> #ifdef __LITTLE_ENDIAN__
> 0x00204060, 0x80808080, 0x80808080, 0x80808080
> #else
> 0x80808080, 0x80808080, 0x80808080, 0x00204060
> #endif
> };
>
> result = ((__vector unsigned long long)
> vec_vbpermq ((__vector unsigned char) __A,
> (__vector unsigned char) perm_mask));
>
> #ifdef __LITTLE_ENDIAN__
> return result[1];
> #else
> return result[0];
> #endif
> }
>
Sure I will add this to the next version.
> Dave
Thanks,
Pavan.
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 1/5] examples/l3fwd: fix port group mask generation
2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (4 preceding siblings ...)
2022-09-08 18:33 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation David Christensen
@ 2022-09-11 18:12 ` pbhagavatula
2022-09-11 18:12 ` [PATCH v3 2/5] examples/l3fwd: split processing and send stages pbhagavatula
` (4 more replies)
5 siblings, 5 replies; 41+ messages in thread
From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw)
To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix port group mask generation in altivec, vec_any_eq returns
0 or 1 while port_groupx4 expects comparison mask result.
Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc")
Cc: stable@dpdk.org
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v3 Changes:
- PPC optimize port mask generation.
- Fix aarch32 compilation.
v2 Changes:
- Fix PPC, RISC-V, aarch32 compilation.
examples/common/altivec/port_group.h | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h
index 5e209b02fa..1c05bc025a 100644
--- a/examples/common/altivec/port_group.h
+++ b/examples/common/altivec/port_group.h
@@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
uint16_t u16[FWDSTEP + 1];
uint64_t u64;
} *pnum = (void *)pn;
-
+ __vector unsigned long long result;
+ const __vector unsigned int perm_mask = {0x00204060, 0x80808080,
+ 0x80808080, 0x80808080};
int32_t v;
- v = vec_any_eq(dp1, dp2);
-
+ dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2);
+ dp1 = vec_mergeh(dp1, dp1);
+ result = (__vector unsigned long long)vec_vbpermq(
+ (__vector unsigned char)dp1, (__vector unsigned char)perm_mask);
+ v = result[1];
/* update last port counter. */
lp[0] += gptbl[v].lpv;
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 2/5] examples/l3fwd: split processing and send stages
2022-09-11 18:12 ` [PATCH v3 " pbhagavatula
@ 2022-09-11 18:12 ` pbhagavatula
2022-09-11 18:12 ` [PATCH v3 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
` (3 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Split packet processing from packet send stage, as send stage
is not common for poll and event mode.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++-----------
examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++---
examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++-------
examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++---
4 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index e76f2760b0..12b997e477 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
return portid;
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t *dst_port, uint16_t portid,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i, j, pos;
- uint16_t dst_port[MAX_PKT_BURST];
/*
* Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets
@@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
dst_port[j + i] = em_get_dst_port(qconf,
pkts_burst[j + i], portid);
}
+
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
}
- for (; j < nb_rx; j++)
+ for (; j < nb_rx; j++) {
dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &pkts_burst[j]->port);
+ }
+}
- send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0);
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
/*
@@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
*/
int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) {
+ for (j = 0; j < nb_rx; j++)
pkts_burst[j] = ev[j]->mbuf;
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *) + 1);
- }
for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
@@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
}
continue;
}
- processx4_step3(&pkts_burst[j], &dst_port[j]);
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
pkts_burst[j + i]->port = dst_port[j + i];
diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h
index 0c6852a7bb..adb82f1478 100644
--- a/examples/l3fwd/l3fwd_lpm_altivec.h
+++ b/examples/l3fwd/l3fwd_lpm_altivec.h
@@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint8_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__vector unsigned int dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
ipv4_flag[j / FWDSTEP],
portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h
index 78ee83b76c..2a68c4c15e 100644
--- a/examples/l3fwd/l3fwd_lpm_neon.h
+++ b/examples/l3fwd/l3fwd_lpm_neon.h
@@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf,
}
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i = 0, j = 0;
- uint16_t dst_port[MAX_PKT_BURST];
int32x4_t dip;
uint32_t ipv4_flag;
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
void *));
}
-
for (j = 0; j != k - FWDSTEP; j += FWDSTEP) {
for (i = 0; i < FWDSTEP; i++) {
rte_prefetch0(rte_pktmbuf_mtod(
@@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid,
&pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
}
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j],
&dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
j += FWDSTEP;
}
@@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
void *));
j++;
}
-
j -= m;
/* Classify last up to 3 packets one by one */
switch (m) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
}
}
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
index 3f637a23d1..db15030320 100644
--- a/examples/l3fwd/l3fwd_lpm_sse.h
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__m128i dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step2(qconf, dip[j / FWDSTEP],
ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 3/5] examples/l3fwd: use lpm vector path for event vector
2022-09-11 18:12 ` [PATCH v3 " pbhagavatula
2022-09-11 18:12 ` [PATCH v3 2/5] examples/l3fwd: split processing and send stages pbhagavatula
@ 2022-09-11 18:12 ` pbhagavatula
2022-09-11 18:12 ` [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
` (2 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use lpm vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++
examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++
examples/l3fwd/l3fwd_lpm.c | 38 ++++++++++--------
examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++
examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++
5 files changed, 213 insertions(+), 16 deletions(-)
diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h
index 87018f5dbe..e45e138e59 100644
--- a/examples/l3fwd/l3fwd_altivec.h
+++ b/examples/l3fwd/l3fwd_altivec.h
@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __vector unsigned short dp1;
+ __vector unsigned short dp;
+
+ dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
+ dp1 = *((__vector unsigned short *)&dst_ports[i]);
+ res = vec_all_eq(dp1, dp);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_ALTIVEC_H_ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index b93841a16f..3fe38aada0 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -82,6 +82,27 @@ struct l3fwd_event_resources {
uint64_t vector_tmo_ns;
};
+#if defined(RTE_ARCH_X86)
+#include "l3fwd_sse.h"
+#elif defined __ARM_NEON
+#include "l3fwd_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "l3fwd_altivec.h"
+#else
+static inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ int i;
+
+ for (i = 0; i < nb_elem; i++) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ }
+
+ return dst_ports[0];
+}
+#endif
+
static inline void
event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
{
@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
}
}
+static inline uint16_t
+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
+ uint16_t nb_pkts)
+{
+ uint16_t *des_pos, free = 0;
+ struct rte_mbuf **pos;
+ int i;
+
+ /* Filter out and free bad packets */
+ for (i = 0; i < nb_pkts; i++) {
+ if (dst_port[i] == BAD_PORT) {
+ rte_pktmbuf_free(mbufs[i]);
+ if (!free) {
+ pos = &mbufs[i];
+ des_pos = &dst_port[i];
+ }
+ free++;
+ continue;
+ }
+
+ if (free) {
+ *pos = mbufs[i];
+ pos++;
+ *des_pos = dst_port[i];
+ des_pos++;
+ }
+ }
+ return nb_pkts - free;
+}
+
+static inline void
+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
+{
+ uint16_t port, i;
+
+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem);
+ /* Verify destination array */
+ port = process_dst_port(dst_port, vec->nb_elem);
+ if (port == BAD_PORT) {
+ vec->attr_valid = 0;
+ for (i = 0; i < vec->nb_elem; i++) {
+ vec->mbufs[i]->port = dst_port[i];
+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
+ }
+ } else {
+ vec->attr_valid = 1;
+ vec->port = port;
+ vec->queue = 0;
+ }
+}
struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index d1b850dd5b..1652b7c470 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf)
+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf,
+ uint16_t *dst_port)
{
struct rte_mbuf **mbufs = vec->mbufs;
int i;
- /* Process first packet to init vector attributes */
- lpm_process_event_pkt(lconf, mbufs[0]);
+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64
if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
+ dst_port, lconf, 1);
+ } else {
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
+ &dst_port[i], lconf, 1);
}
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]);
+#endif
- for (i = 1; i < vec->nb_elem; i++) {
- lpm_process_event_pkt(lconf, mbufs[i]);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ process_event_vector(vec, dst_port);
}
/* Same eventdev loop for single and burst of vector */
@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
struct rte_event events[MAX_PKT_BURST];
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
+ uint16_t *dst_port_list;
unsigned int lcore_id;
if (event_p_id < 0)
@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
-
+ dst_port_list =
+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_port_list == NULL)
+ return;
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
while (!force_quit) {
@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- lpm_process_event_vector(events[i].vec, lconf);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ lpm_process_event_vector(events[i].vec, lconf,
+ dst_port_list);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
index ce515e0bc4..bf365341fb 100644
--- a/examples/l3fwd/l3fwd_neon.h
+++ b/examples/l3fwd/l3fwd_neon.h
@@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0;
+
+#if defined(RTE_ARCH_ARM64)
+ uint16_t res;
+
+ while (nb_elem > 7) {
+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
+ uint16x8_t dp1;
+
+ dp1 = vld1q_u16(&dst_ports[i]);
+ dp1 = vceqq_u16(dp1, dp);
+ res = vminvq_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ uint16x4_t dp = vdup_n_u16(dst_ports[0]);
+ uint16x4_t dp1;
+
+ dp1 = vld1_u16(&dst_ports[i]);
+ dp1 = vceq_u16(dp1, dp);
+ res = vminv_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+#endif
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_NEON_H_ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index 0f0d0323a2..083729cdef 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ res = _mm_movemask_epi8(dp1);
+ if (res != 0xFFFF)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
+ res = _mm_movemask_ps((__m128)dp1);
+ if (res != 0xF)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_SSE_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib
2022-09-11 18:12 ` [PATCH v3 " pbhagavatula
2022-09-11 18:12 ` [PATCH v3 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-09-11 18:12 ` [PATCH v3 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
@ 2022-09-11 18:12 ` pbhagavatula
2022-10-07 20:03 ` [EXT] " Shijith Thotton
2022-09-11 18:12 ` [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
4 siblings, 1 reply; 41+ messages in thread
From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix stack overflow when event vector size is greater than
MAX_BURST_SIZE.
Add missing mac swap and rfc1812 stage.
Fixes: e8adca1951d4 ("examples/l3fwd: support event vector")
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_fib.c | 123 ++++++++++++++++++++++++++-----------
1 file changed, 86 insertions(+), 37 deletions(-)
diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c
index e02e4b3f5a..c4a45bc7f3 100644
--- a/examples/l3fwd/l3fwd_fib.c
+++ b/examples/l3fwd/l3fwd_fib.c
@@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf,
*/
#if !defined FIB_SEND_MULTI
static inline void
-fib_send_single(int nb_tx, struct lcore_conf *qconf,
- struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+process_packet(struct rte_mbuf *pkt, uint16_t *hop)
{
- int32_t j;
struct rte_ether_hdr *eth_hdr;
- for (j = 0; j < nb_tx; j++) {
- /* Run rfc1812 if packet is ipv4 and checks enabled. */
+ /* Run rfc1812 if packet is ipv4 and checks enabled. */
#if defined DO_RFC_1812_CHECKS
- rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
- pkts_burst[j], struct rte_ether_hdr *) + 1),
- &hops[j], pkts_burst[j]->packet_type);
+ rfc1812_process(
+ (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
+ pkt, struct rte_ether_hdr *) +
+ 1),
+ hop, pkt->packet_type);
#endif
- /* Set MAC addresses. */
- eth_hdr = rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *);
- *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]];
- rte_ether_addr_copy(&ports_eth_addr[hops[j]],
- ð_hdr->src_addr);
+ /* Set MAC addresses. */
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+ *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop];
+ rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr);
+}
+
+static inline void
+fib_send_single(int nb_tx, struct lcore_conf *qconf,
+ struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+{
+ int32_t j;
+ for (j = 0; j < nb_tx; j++) {
+ process_packet(pkts_burst[j], &hops[j]);
+ if (hops[j] == BAD_PORT) {
+ rte_pktmbuf_free(pkts_burst[j]);
+ continue;
+ }
/* Send single packet. */
send_single_packet(qconf, pkts_burst[j], hops[j]);
}
@@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
uint32_t ipv4_arr[MAX_PKT_BURST];
uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
- uint16_t nh;
+ uint16_t nh, hops[MAX_PKT_BURST];
uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
uint32_t ipv4_arr_assem, ipv6_arr_assem;
@@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- events[i].mbuf->port = nh;
+ hops[i] = nh != FIB_DEFAULT_HOP ?
+ nh :
+ events[i].mbuf->port;
+ process_packet(events[i].mbuf, &hops[i]);
+ events[i].mbuf->port = hops[i] != BAD_PORT ?
+ hops[i] :
+ events[i].mbuf->port;
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-fib_process_event_vector(struct rte_event_vector *vec)
+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr,
+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6,
+ uint32_t *ipv4_arr, uint16_t *hops)
{
- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
uint32_t ipv4_arr_assem, ipv6_arr_assem;
struct rte_mbuf **mbufs = vec->mbufs;
- uint32_t ipv4_arr[MAX_PKT_BURST];
- uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
struct lcore_conf *lconf;
uint16_t nh;
@@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector *vec)
/* Lookup IPv6 hops if IPv6 packets are present. */
if (ipv6_cnt > 0)
- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr,
- hopsv6, ipv6_cnt);
-
- if (vec->attr_valid) {
- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0];
- if (nh != FIB_DEFAULT_HOP)
- vec->port = nh;
- else
- vec->attr_valid = 0;
- }
+ rte_fib6_lookup_bulk(
+ lconf->ipv6_lookup_struct,
+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6,
+ ipv6_cnt);
/* Assign ports looked up in fib depending on IPv4 or IPv6 */
for (i = 0; i < vec->nb_elem; i++) {
@@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec)
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- mbufs[i]->port = nh;
- event_vector_attr_validate(vec, mbufs[i]);
+ hops[i] = nh;
+ else
+ hops[i] = vec->attr_valid ? vec->port :
+ vec->mbufs[i]->port;
}
+
+#if defined FIB_SEND_MULTI
+ uint16_t k;
+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != k; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &hops[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#endif
+
+ process_event_vector(vec, hops);
}
static __rte_always_inline void
@@ -496,7 +521,32 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
const uint8_t event_d_id = evt_rsrc->event_d_id;
const uint16_t deq_len = evt_rsrc->deq_depth;
struct rte_event events[MAX_PKT_BURST];
+ uint8_t *type_arr, **ipv6_arr, *ptr;
int nb_enq = 0, nb_deq = 0, i;
+ uint64_t *hopsv4, *hopsv6;
+ uint32_t *ipv4_arr;
+ uint16_t *hops;
+ uintptr_t mem;
+
+ mem = (uintptr_t)rte_zmalloc(
+ "vector_fib",
+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) +
+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) +
+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) *
+ evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (mem == 0)
+ return;
+ ipv4_arr = (uint32_t *)mem;
+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size];
+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size];
+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size];
+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size];
+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size];
+
+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size];
+ for (i = 0; i < evt_rsrc->vector_size; i++)
+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i];
if (event_p_id < 0)
return;
@@ -519,10 +569,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- fib_process_event_vector(events[i].vec);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ fib_process_event_vector(events[i].vec, type_arr,
+ ipv6_arr, hopsv4, hopsv6,
+ ipv4_arr, hops);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector
2022-09-11 18:12 ` [PATCH v3 " pbhagavatula
` (2 preceding siblings ...)
2022-09-11 18:12 ` [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
@ 2022-09-11 18:12 ` pbhagavatula
2022-10-07 20:01 ` [EXT] " Shijith Thotton
2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
4 siblings, 1 reply; 41+ messages in thread
From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use em vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em.c | 12 +++--
examples/l3fwd/l3fwd_em.h | 29 +++++------
examples/l3fwd/l3fwd_em_hlm.h | 72 +++++-----------------------
examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++----
examples/l3fwd/l3fwd_event.h | 21 --------
5 files changed, 47 insertions(+), 112 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 10be24c61d..e7b35cfbd9 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -852,10 +852,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
unsigned int lcore_id;
+ uint16_t *dst_ports;
if (event_p_id < 0)
return;
+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_ports == NULL)
+ return;
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
@@ -877,13 +882,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
}
#if defined RTE_ARCH_X86 || defined __ARM_NEON
- l3fwd_em_process_event_vector(events[i].vec, lconf);
+ l3fwd_em_process_event_vector(events[i].vec, lconf,
+ dst_ports);
#else
l3fwd_em_no_opt_process_event_vector(events[i].vec,
- lconf);
+ lconf, dst_ports);
#endif
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h
index fe2ee59f6a..7d051fc076 100644
--- a/examples/l3fwd/l3fwd_em.h
+++ b/examples/l3fwd/l3fwd_em.h
@@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid,
}
}
-static __rte_always_inline void
+static __rte_always_inline uint16_t
l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
{
struct rte_ether_hdr *eth_hdr;
@@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf);
else
m->port = BAD_PORT;
+
+ return m->port;
}
/*
@@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events,
static inline void
l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf,
+ uint16_t *dst_ports)
{
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i;
@@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *));
- /* Process first packet to init vector attributes */
- l3fwd_em_simple_process(mbufs[0], qconf);
- if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
- }
-
/*
* Prefetch and forward already prefetched packets.
*/
- for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
+ for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
rte_prefetch0(
rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *));
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
}
/* Forward remaining prefetched packets */
- for (; i < vec->nb_elem; i++) {
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ for (; i < vec->nb_elem; i++)
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 12b997e477..2e11eefad7 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_port)
{
- struct rte_mbuf **mbufs = vec->mbufs;
- uint16_t dst_port[MAX_PKT_BURST];
- int32_t i, j, n, pos;
-
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++)
- rte_prefetch0(
- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1);
+ uint16_t i;
if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
- uint32_t pkt_type =
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP;
- uint32_t l3_type, tcp_or_udp;
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
- pkt_type &= mbufs[j + i]->packet_type;
-
- l3_type = pkt_type & RTE_PTYPE_L3_MASK;
- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
-
- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem;
- i++, pos++) {
- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos],
- struct rte_ether_hdr *) +
- 1);
- }
-
- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else {
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port =
- em_get_dst_port(qconf, mbufs[j + i],
- mbufs[j + i]->port);
- process_packet(mbufs[j + i],
- &mbufs[j + i]->port);
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- continue;
- }
- processx4_step3(&mbufs[j], &dst_port[j]);
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port = dst_port[j + i];
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- }
-
- for (; j < vec->nb_elem; j++) {
- mbufs[j]->port =
- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port);
- process_packet(mbufs[j], &mbufs[j]->port);
- event_vector_attr_validate(vec, mbufs[j]);
- }
+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port,
+ vec->port, qconf, 1);
+ else
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_em_process_packets(1, &vec->mbufs[i],
+ &dst_port[i],
+ vec->mbufs[i]->port, qconf, 1);
+
+ process_event_vector(vec, dst_port);
}
#endif /* __L3FWD_EM_HLM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index d2f75edb8a..067f23889a 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events,
for (i = 1, j = 0; j < nb_rx; i++, j++) {
struct rte_mbuf *mbuf = events[j]->mbuf;
+ uint16_t port;
if (i < nb_rx) {
rte_prefetch0(rte_pktmbuf_mtod(
events[i]->mbuf,
struct rte_ether_hdr *) + 1);
}
+ port = mbuf->port;
mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
process_packet(mbuf, &mbuf->port);
+ if (mbuf->port == BAD_PORT)
+ mbuf->port = port;
}
}
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_ports)
{
+ const uint8_t attr_valid = vec->attr_valid;
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i, j;
rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1);
- if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
for (i = 0, j = 1; i < vec->nb_elem; i++, j++) {
if (j < vec->nb_elem)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[j],
struct rte_ether_hdr *) +
1);
- mbufs[i]->port =
- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port);
- process_packet(mbufs[i], &mbufs[i]->port);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
+ attr_valid ? vec->port :
+ mbufs[i]->port);
}
+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != j; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &dst_ports[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &dst_ports[i]);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_SEQUENTIAL_H__ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index 3fe38aada0..e21817c36b 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
}
#endif
-static inline void
-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
-{
- /* l3fwd application only changes mbuf port while processing */
- if (vec->attr_valid && (vec->port != mbuf->port))
- vec->attr_valid = 0;
-}
-
-static inline void
-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
-{
- if (vec->attr_valid) {
- vec->queue = txq;
- } else {
- int i;
-
- for (i = 0; i < vec->nb_elem; i++)
- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq);
- }
-}
-
static inline uint16_t
filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
uint16_t nb_pkts)
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector
2022-09-11 18:12 ` [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
@ 2022-10-07 20:01 ` Shijith Thotton
0 siblings, 0 replies; 41+ messages in thread
From: Shijith Thotton @ 2022-10-07 20:01 UTC (permalink / raw)
To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran
Cc: dev, Pavan Nikhilesh Bhagavatula
>Use em vector path to process event vector.
>
>Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>---
> examples/l3fwd/l3fwd_em.c | 12 +++--
> examples/l3fwd/l3fwd_em.h | 29 +++++------
> examples/l3fwd/l3fwd_em_hlm.h | 72 +++++-----------------------
> examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++----
> examples/l3fwd/l3fwd_event.h | 21 --------
> 5 files changed, 47 insertions(+), 112 deletions(-)
>
>diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
>index 10be24c61d..e7b35cfbd9 100644
>--- a/examples/l3fwd/l3fwd_em.c
>+++ b/examples/l3fwd/l3fwd_em.c
>@@ -852,10 +852,15 @@ em_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> int i, nb_enq = 0, nb_deq = 0;
> struct lcore_conf *lconf;
> unsigned int lcore_id;
>+ uint16_t *dst_ports;
>
> if (event_p_id < 0)
> return;
>
>+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
>+ RTE_CACHE_LINE_SIZE);
Free missing.
>+ if (dst_ports == NULL)
>+ return;
> lcore_id = rte_lcore_id();
> lconf = &lcore_conf[lcore_id];
>
>@@ -877,13 +882,12 @@ em_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> }
>
> #if defined RTE_ARCH_X86 || defined __ARM_NEON
>- l3fwd_em_process_event_vector(events[i].vec, lconf);
>+ l3fwd_em_process_event_vector(events[i].vec, lconf,
>+ dst_ports);
> #else
> l3fwd_em_no_opt_process_event_vector(events[i].vec,
>- lconf);
>+ lconf, dst_ports);
> #endif
>- if (flags & L3FWD_EVENT_TX_DIRECT)
>- event_vector_txq_set(events[i].vec, 0);
> }
>
> if (flags & L3FWD_EVENT_TX_ENQ) {
>diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h
>index fe2ee59f6a..7d051fc076 100644
>--- a/examples/l3fwd/l3fwd_em.h
>+++ b/examples/l3fwd/l3fwd_em.h
>@@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t
>portid,
> }
> }
>
>-static __rte_always_inline void
>+static __rte_always_inline uint16_t
> l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
> {
> struct rte_ether_hdr *eth_hdr;
>@@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct
>lcore_conf *qconf)
> m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf);
> else
> m->port = BAD_PORT;
>+
>+ return m->port;
> }
>
> /*
>@@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct
>rte_event **events,
>
> static inline void
> l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
>- struct lcore_conf *qconf)
>+ struct lcore_conf *qconf,
>+ uint16_t *dst_ports)
> {
> struct rte_mbuf **mbufs = vec->mbufs;
> int32_t i;
>@@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct
>rte_event_vector *vec,
> for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++)
> rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *));
>
>- /* Process first packet to init vector attributes */
>- l3fwd_em_simple_process(mbufs[0], qconf);
>- if (vec->attr_valid) {
>- if (mbufs[0]->port != BAD_PORT)
>- vec->port = mbufs[0]->port;
>- else
>- vec->attr_valid = 0;
>- }
>-
> /*
> * Prefetch and forward already prefetched packets.
> */
>- for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
>+ for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
> rte_prefetch0(
> rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void
>*));
>- l3fwd_em_simple_process(mbufs[i], qconf);
>- event_vector_attr_validate(vec, mbufs[i]);
>+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
> }
>
> /* Forward remaining prefetched packets */
>- for (; i < vec->nb_elem; i++) {
>- l3fwd_em_simple_process(mbufs[i], qconf);
>- event_vector_attr_validate(vec, mbufs[i]);
>- }
>+ for (; i < vec->nb_elem; i++)
>+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
>+
>+ process_event_vector(vec, dst_ports);
> }
>
> #endif /* __L3FWD_EM_H__ */
>diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
>index 12b997e477..2e11eefad7 100644
>--- a/examples/l3fwd/l3fwd_em_hlm.h
>+++ b/examples/l3fwd/l3fwd_em_hlm.h
>@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event
>**ev,
>
> static inline void
> l3fwd_em_process_event_vector(struct rte_event_vector *vec,
>- struct lcore_conf *qconf)
>+ struct lcore_conf *qconf, uint16_t *dst_port)
> {
>- struct rte_mbuf **mbufs = vec->mbufs;
>- uint16_t dst_port[MAX_PKT_BURST];
>- int32_t i, j, n, pos;
>-
>- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++)
>- rte_prefetch0(
>- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1);
>+ uint16_t i;
>
> if (vec->attr_valid)
>- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
>-
>- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT);
>- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
>- uint32_t pkt_type =
>- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP |
>RTE_PTYPE_L4_UDP;
>- uint32_t l3_type, tcp_or_udp;
>-
>- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
>- pkt_type &= mbufs[j + i]->packet_type;
>-
>- l3_type = pkt_type & RTE_PTYPE_L3_MASK;
>- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP |
>RTE_PTYPE_L4_UDP);
>-
>- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
>- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem;
>- i++, pos++) {
>- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos],
>- struct rte_ether_hdr *) +
>- 1);
>- }
>-
>- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
>- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j],
>- &dst_port[j]);
>- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
>- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j],
>- &dst_port[j]);
>- } else {
>- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
>- mbufs[j + i]->port =
>- em_get_dst_port(qconf, mbufs[j + i],
>- mbufs[j + i]->port);
>- process_packet(mbufs[j + i],
>- &mbufs[j + i]->port);
>- event_vector_attr_validate(vec, mbufs[j + i]);
>- }
>- continue;
>- }
>- processx4_step3(&mbufs[j], &dst_port[j]);
>-
>- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
>- mbufs[j + i]->port = dst_port[j + i];
>- event_vector_attr_validate(vec, mbufs[j + i]);
>- }
>- }
>-
>- for (; j < vec->nb_elem; j++) {
>- mbufs[j]->port =
>- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port);
>- process_packet(mbufs[j], &mbufs[j]->port);
>- event_vector_attr_validate(vec, mbufs[j]);
>- }
>+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs,
>dst_port,
>+ vec->port, qconf, 1);
>+ else
>+ for (i = 0; i < vec->nb_elem; i++)
>+ l3fwd_em_process_packets(1, &vec->mbufs[i],
>+ &dst_port[i],
>+ vec->mbufs[i]->port, qconf, 1);
>+
>+ process_event_vector(vec, dst_port);
> }
>
> #endif /* __L3FWD_EM_HLM_H__ */
>diff --git a/examples/l3fwd/l3fwd_em_sequential.h
>b/examples/l3fwd/l3fwd_em_sequential.h
>index d2f75edb8a..067f23889a 100644
>--- a/examples/l3fwd/l3fwd_em_sequential.h
>+++ b/examples/l3fwd/l3fwd_em_sequential.h
>@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event
>**events,
>
> for (i = 1, j = 0; j < nb_rx; i++, j++) {
> struct rte_mbuf *mbuf = events[j]->mbuf;
>+ uint16_t port;
>
> if (i < nb_rx) {
> rte_prefetch0(rte_pktmbuf_mtod(
> events[i]->mbuf,
> struct rte_ether_hdr *) + 1);
> }
>+ port = mbuf->port;
> mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
> process_packet(mbuf, &mbuf->port);
>+ if (mbuf->port == BAD_PORT)
>+ mbuf->port = port;
> }
> }
>
> static inline void
> l3fwd_em_process_event_vector(struct rte_event_vector *vec,
>- struct lcore_conf *qconf)
>+ struct lcore_conf *qconf, uint16_t *dst_ports)
> {
>+ const uint8_t attr_valid = vec->attr_valid;
> struct rte_mbuf **mbufs = vec->mbufs;
> int32_t i, j;
>
> rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1);
>
>- if (vec->attr_valid)
>- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
>-
> for (i = 0, j = 1; i < vec->nb_elem; i++, j++) {
> if (j < vec->nb_elem)
> rte_prefetch0(rte_pktmbuf_mtod(mbufs[j],
> struct rte_ether_hdr *) +
> 1);
>- mbufs[i]->port =
>- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port);
>- process_packet(mbufs[i], &mbufs[i]->port);
>- event_vector_attr_validate(vec, mbufs[i]);
>+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
>+ attr_valid ? vec->port :
>+ mbufs[i]->port);
> }
>+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
>+
>+ for (i = 0; i != j; i += FWDSTEP)
>+ processx4_step3(&vec->mbufs[i], &dst_ports[i]);
>+ for (; i < vec->nb_elem; i++)
>+ process_packet(vec->mbufs[i], &dst_ports[i]);
>+
>+ process_event_vector(vec, dst_ports);
> }
>
> #endif /* __L3FWD_EM_SEQUENTIAL_H__ */
>diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
>index 3fe38aada0..e21817c36b 100644
>--- a/examples/l3fwd/l3fwd_event.h
>+++ b/examples/l3fwd/l3fwd_event.h
>@@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t
>nb_elem)
> }
> #endif
>
>-static inline void
>-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf
>*mbuf)
>-{
>- /* l3fwd application only changes mbuf port while processing */
>- if (vec->attr_valid && (vec->port != mbuf->port))
>- vec->attr_valid = 0;
>-}
>-
>-static inline void
>-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
>-{
>- if (vec->attr_valid) {
>- vec->queue = txq;
>- } else {
>- int i;
>-
>- for (i = 0; i < vec->nb_elem; i++)
>- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq);
>- }
>-}
>-
> static inline uint16_t
> filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
> uint16_t nb_pkts)
>--
>2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib
2022-09-11 18:12 ` [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
@ 2022-10-07 20:03 ` Shijith Thotton
0 siblings, 0 replies; 41+ messages in thread
From: Shijith Thotton @ 2022-10-07 20:03 UTC (permalink / raw)
To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran
Cc: dev, Pavan Nikhilesh Bhagavatula
>
>Fix stack overflow when event vector size is greater than
>MAX_BURST_SIZE.
>Add missing mac swap and rfc1812 stage.
>
>Fixes: e8adca1951d4 ("examples/l3fwd: support event vector")
>
>Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>---
> examples/l3fwd/l3fwd_fib.c | 123 ++++++++++++++++++++++++++-----------
> 1 file changed, 86 insertions(+), 37 deletions(-)
>
>diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c
>index e02e4b3f5a..c4a45bc7f3 100644
>--- a/examples/l3fwd/l3fwd_fib.c
>+++ b/examples/l3fwd/l3fwd_fib.c
>@@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf,
> */
> #if !defined FIB_SEND_MULTI
> static inline void
>-fib_send_single(int nb_tx, struct lcore_conf *qconf,
>- struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
>+process_packet(struct rte_mbuf *pkt, uint16_t *hop)
> {
>- int32_t j;
> struct rte_ether_hdr *eth_hdr;
>
>- for (j = 0; j < nb_tx; j++) {
>- /* Run rfc1812 if packet is ipv4 and checks enabled. */
>+ /* Run rfc1812 if packet is ipv4 and checks enabled. */
> #if defined DO_RFC_1812_CHECKS
>- rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
>- pkts_burst[j], struct rte_ether_hdr *) + 1),
>- &hops[j], pkts_burst[j]->packet_type);
>+ rfc1812_process(
>+ (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
>+ pkt, struct rte_ether_hdr *) +
>+ 1),
>+ hop, pkt->packet_type);
> #endif
>
>- /* Set MAC addresses. */
>- eth_hdr = rte_pktmbuf_mtod(pkts_burst[j],
>- struct rte_ether_hdr *);
>- *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]];
>- rte_ether_addr_copy(&ports_eth_addr[hops[j]],
>- ð_hdr->src_addr);
>+ /* Set MAC addresses. */
>+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
>+ *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop];
>+ rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr);
>+}
>+
>+static inline void
>+fib_send_single(int nb_tx, struct lcore_conf *qconf,
>+ struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
>+{
>+ int32_t j;
>
>+ for (j = 0; j < nb_tx; j++) {
>+ process_packet(pkts_burst[j], &hops[j]);
>+ if (hops[j] == BAD_PORT) {
>+ rte_pktmbuf_free(pkts_burst[j]);
>+ continue;
>+ }
> /* Send single packet. */
> send_single_packet(qconf, pkts_burst[j], hops[j]);
> }
>@@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
> uint32_t ipv4_arr[MAX_PKT_BURST];
> uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
> uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
>- uint16_t nh;
>+ uint16_t nh, hops[MAX_PKT_BURST];
> uint8_t type_arr[MAX_PKT_BURST];
> uint32_t ipv4_cnt, ipv6_cnt;
> uint32_t ipv4_arr_assem, ipv6_arr_assem;
>@@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
> else
> nh = (uint16_t)hopsv6[ipv6_arr_assem++];
> if (nh != FIB_DEFAULT_HOP)
>- events[i].mbuf->port = nh;
>+ hops[i] = nh != FIB_DEFAULT_HOP ?
>+ nh :
>+ events[i].mbuf->port;
>+ process_packet(events[i].mbuf, &hops[i]);
>+ events[i].mbuf->port = hops[i] != BAD_PORT ?
>+ hops[i] :
>+ events[i].mbuf->port;
> }
>
> if (flags & L3FWD_EVENT_TX_ENQ) {
>@@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void
>*dummy)
> }
>
> static __rte_always_inline void
>-fib_process_event_vector(struct rte_event_vector *vec)
>+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr,
>+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6,
>+ uint32_t *ipv4_arr, uint16_t *hops)
> {
>- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
>- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
> uint32_t ipv4_arr_assem, ipv6_arr_assem;
> struct rte_mbuf **mbufs = vec->mbufs;
>- uint32_t ipv4_arr[MAX_PKT_BURST];
>- uint8_t type_arr[MAX_PKT_BURST];
> uint32_t ipv4_cnt, ipv6_cnt;
> struct lcore_conf *lconf;
> uint16_t nh;
>@@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector
>*vec)
>
> /* Lookup IPv6 hops if IPv6 packets are present. */
> if (ipv6_cnt > 0)
>- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr,
>- hopsv6, ipv6_cnt);
>-
>- if (vec->attr_valid) {
>- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0];
>- if (nh != FIB_DEFAULT_HOP)
>- vec->port = nh;
>- else
>- vec->attr_valid = 0;
>- }
>+ rte_fib6_lookup_bulk(
>+ lconf->ipv6_lookup_struct,
>+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6,
>+ ipv6_cnt);
>
> /* Assign ports looked up in fib depending on IPv4 or IPv6 */
> for (i = 0; i < vec->nb_elem; i++) {
>@@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec)
> else
> nh = (uint16_t)hopsv6[ipv6_arr_assem++];
> if (nh != FIB_DEFAULT_HOP)
>- mbufs[i]->port = nh;
>- event_vector_attr_validate(vec, mbufs[i]);
>+ hops[i] = nh;
>+ else
>+ hops[i] = vec->attr_valid ? vec->port :
>+ vec->mbufs[i]->port;
> }
>+
>+#if defined FIB_SEND_MULTI
>+ uint16_t k;
>+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
>+
>+ for (i = 0; i != k; i += FWDSTEP)
>+ processx4_step3(&vec->mbufs[i], &hops[i]);
>+ for (; i < vec->nb_elem; i++)
>+ process_packet(vec->mbufs[i], &hops[i]);
>+#else
>+ for (i = 0; i < vec->nb_elem; i++)
>+ process_packet(vec->mbufs[i], &hops[i]);
>+#endif
>+
>+ process_event_vector(vec, hops);
> }
>
> static __rte_always_inline void
>@@ -496,7 +521,32 @@ fib_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> const uint8_t event_d_id = evt_rsrc->event_d_id;
> const uint16_t deq_len = evt_rsrc->deq_depth;
> struct rte_event events[MAX_PKT_BURST];
>+ uint8_t *type_arr, **ipv6_arr, *ptr;
> int nb_enq = 0, nb_deq = 0, i;
>+ uint64_t *hopsv4, *hopsv6;
>+ uint32_t *ipv4_arr;
>+ uint16_t *hops;
>+ uintptr_t mem;
>+
>+ mem = (uintptr_t)rte_zmalloc(
>+ "vector_fib",
>+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) +
>+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) +
>+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) *
>+ evt_rsrc->vector_size,
>+ RTE_CACHE_LINE_SIZE);
Free missing.
>+ if (mem == 0)
>+ return;
>+ ipv4_arr = (uint32_t *)mem;
>+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size];
>+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size];
>+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size];
>+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size];
>+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size];
>+
>+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size];
>+ for (i = 0; i < evt_rsrc->vector_size; i++)
>+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i];
>
> if (event_p_id < 0)
> return;
>@@ -519,10 +569,9 @@ fib_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> events[i].op = RTE_EVENT_OP_FORWARD;
> }
>
>- fib_process_event_vector(events[i].vec);
>-
>- if (flags & L3FWD_EVENT_TX_DIRECT)
>- event_vector_txq_set(events[i].vec, 0);
>+ fib_process_event_vector(events[i].vec, type_arr,
>+ ipv6_arr, hopsv4, hopsv6,
>+ ipv4_arr, hops);
> }
>
> if (flags & L3FWD_EVENT_TX_ENQ) {
>--
>2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 1/5] examples/l3fwd: fix port group mask generation
2022-09-11 18:12 ` [PATCH v3 " pbhagavatula
` (3 preceding siblings ...)
2022-09-11 18:12 ` [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
@ 2022-10-11 9:08 ` pbhagavatula
2022-10-11 9:08 ` [PATCH v4 2/5] examples/l3fwd: split processing and send stages pbhagavatula
` (4 more replies)
4 siblings, 5 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw)
To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix port group mask generation in altivec, vec_any_eq returns
0 or 1 while port_groupx4 expects comparison mask result.
Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc")
Cc: stable@dpdk.org
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v4 Changes:
- Fix missing `rte_free`.
v3 Changes:
- PPC optimize port mask generation.
- Fix aarch32 compilation.
v2 Changes:
- Fix PPC, RISC-V, aarch32 compilation.
examples/common/altivec/port_group.h | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h
index 5e209b02fa..1c05bc025a 100644
--- a/examples/common/altivec/port_group.h
+++ b/examples/common/altivec/port_group.h
@@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
uint16_t u16[FWDSTEP + 1];
uint64_t u64;
} *pnum = (void *)pn;
-
+ __vector unsigned long long result;
+ const __vector unsigned int perm_mask = {0x00204060, 0x80808080,
+ 0x80808080, 0x80808080};
int32_t v;
- v = vec_any_eq(dp1, dp2);
-
+ dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2);
+ dp1 = vec_mergeh(dp1, dp1);
+ result = (__vector unsigned long long)vec_vbpermq(
+ (__vector unsigned char)dp1, (__vector unsigned char)perm_mask);
+ v = result[1];
/* update last port counter. */
lp[0] += gptbl[v].lpv;
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 2/5] examples/l3fwd: split processing and send stages
2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
@ 2022-10-11 9:08 ` pbhagavatula
2022-10-11 9:08 ` [PATCH v4 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
` (3 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Split packet processing from packet send stage, as send stage
is not common for poll and event mode.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++-----------
examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++---
examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++-------
examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++---
4 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index e76f2760b0..12b997e477 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
return portid;
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t *dst_port, uint16_t portid,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i, j, pos;
- uint16_t dst_port[MAX_PKT_BURST];
/*
* Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets
@@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
dst_port[j + i] = em_get_dst_port(qconf,
pkts_burst[j + i], portid);
}
+
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
}
- for (; j < nb_rx; j++)
+ for (; j < nb_rx; j++) {
dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &pkts_burst[j]->port);
+ }
+}
- send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0);
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
/*
@@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
*/
int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) {
+ for (j = 0; j < nb_rx; j++)
pkts_burst[j] = ev[j]->mbuf;
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *) + 1);
- }
for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
@@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
}
continue;
}
- processx4_step3(&pkts_burst[j], &dst_port[j]);
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
pkts_burst[j + i]->port = dst_port[j + i];
diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h
index 0c6852a7bb..adb82f1478 100644
--- a/examples/l3fwd/l3fwd_lpm_altivec.h
+++ b/examples/l3fwd/l3fwd_lpm_altivec.h
@@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint8_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__vector unsigned int dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
ipv4_flag[j / FWDSTEP],
portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h
index 78ee83b76c..2a68c4c15e 100644
--- a/examples/l3fwd/l3fwd_lpm_neon.h
+++ b/examples/l3fwd/l3fwd_lpm_neon.h
@@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf,
}
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i = 0, j = 0;
- uint16_t dst_port[MAX_PKT_BURST];
int32x4_t dip;
uint32_t ipv4_flag;
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
void *));
}
-
for (j = 0; j != k - FWDSTEP; j += FWDSTEP) {
for (i = 0; i < FWDSTEP; i++) {
rte_prefetch0(rte_pktmbuf_mtod(
@@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid,
&pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
}
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j],
&dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
j += FWDSTEP;
}
@@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
void *));
j++;
}
-
j -= m;
/* Classify last up to 3 packets one by one */
switch (m) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
}
}
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
index 3f637a23d1..db15030320 100644
--- a/examples/l3fwd/l3fwd_lpm_sse.h
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__m128i dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step2(qconf, dip[j / FWDSTEP],
ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 3/5] examples/l3fwd: use lpm vector path for event vector
2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-10-11 9:08 ` [PATCH v4 2/5] examples/l3fwd: split processing and send stages pbhagavatula
@ 2022-10-11 9:08 ` pbhagavatula
2022-10-11 9:08 ` [PATCH v4 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
` (2 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use lpm vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++
examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++
examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++--------
examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++
examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++
5 files changed, 214 insertions(+), 16 deletions(-)
diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h
index 87018f5dbe..e45e138e59 100644
--- a/examples/l3fwd/l3fwd_altivec.h
+++ b/examples/l3fwd/l3fwd_altivec.h
@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __vector unsigned short dp1;
+ __vector unsigned short dp;
+
+ dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
+ dp1 = *((__vector unsigned short *)&dst_ports[i]);
+ res = vec_all_eq(dp1, dp);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_ALTIVEC_H_ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index b93841a16f..3fe38aada0 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -82,6 +82,27 @@ struct l3fwd_event_resources {
uint64_t vector_tmo_ns;
};
+#if defined(RTE_ARCH_X86)
+#include "l3fwd_sse.h"
+#elif defined __ARM_NEON
+#include "l3fwd_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "l3fwd_altivec.h"
+#else
+static inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ int i;
+
+ for (i = 0; i < nb_elem; i++) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ }
+
+ return dst_ports[0];
+}
+#endif
+
static inline void
event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
{
@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
}
}
+static inline uint16_t
+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
+ uint16_t nb_pkts)
+{
+ uint16_t *des_pos, free = 0;
+ struct rte_mbuf **pos;
+ int i;
+
+ /* Filter out and free bad packets */
+ for (i = 0; i < nb_pkts; i++) {
+ if (dst_port[i] == BAD_PORT) {
+ rte_pktmbuf_free(mbufs[i]);
+ if (!free) {
+ pos = &mbufs[i];
+ des_pos = &dst_port[i];
+ }
+ free++;
+ continue;
+ }
+
+ if (free) {
+ *pos = mbufs[i];
+ pos++;
+ *des_pos = dst_port[i];
+ des_pos++;
+ }
+ }
+ return nb_pkts - free;
+}
+
+static inline void
+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
+{
+ uint16_t port, i;
+
+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem);
+ /* Verify destination array */
+ port = process_dst_port(dst_port, vec->nb_elem);
+ if (port == BAD_PORT) {
+ vec->attr_valid = 0;
+ for (i = 0; i < vec->nb_elem; i++) {
+ vec->mbufs[i]->port = dst_port[i];
+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
+ }
+ } else {
+ vec->attr_valid = 1;
+ vec->port = port;
+ vec->queue = 0;
+ }
+}
struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index 22d7f61a42..5172979c72 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf)
+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf,
+ uint16_t *dst_port)
{
struct rte_mbuf **mbufs = vec->mbufs;
int i;
- /* Process first packet to init vector attributes */
- lpm_process_event_pkt(lconf, mbufs[0]);
+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64
if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
+ dst_port, lconf, 1);
+ } else {
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
+ &dst_port[i], lconf, 1);
}
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]);
+#endif
- for (i = 1; i < vec->nb_elem; i++) {
- lpm_process_event_pkt(lconf, mbufs[i]);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ process_event_vector(vec, dst_port);
}
/* Same eventdev loop for single and burst of vector */
@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
struct rte_event events[MAX_PKT_BURST];
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
+ uint16_t *dst_port_list;
unsigned int lcore_id;
if (event_p_id < 0)
@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
-
+ dst_port_list =
+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_port_list == NULL)
+ return;
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
while (!force_quit) {
@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- lpm_process_event_vector(events[i].vec, lconf);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ lpm_process_event_vector(events[i].vec, lconf,
+ dst_port_list);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free(dst_port_list);
}
int __rte_noinline
diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
index ce515e0bc4..bf365341fb 100644
--- a/examples/l3fwd/l3fwd_neon.h
+++ b/examples/l3fwd/l3fwd_neon.h
@@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0;
+
+#if defined(RTE_ARCH_ARM64)
+ uint16_t res;
+
+ while (nb_elem > 7) {
+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
+ uint16x8_t dp1;
+
+ dp1 = vld1q_u16(&dst_ports[i]);
+ dp1 = vceqq_u16(dp1, dp);
+ res = vminvq_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ uint16x4_t dp = vdup_n_u16(dst_ports[0]);
+ uint16x4_t dp1;
+
+ dp1 = vld1_u16(&dst_ports[i]);
+ dp1 = vceq_u16(dp1, dp);
+ res = vminv_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+#endif
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_NEON_H_ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index 0f0d0323a2..083729cdef 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ res = _mm_movemask_epi8(dp1);
+ if (res != 0xFFFF)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
+ res = _mm_movemask_ps((__m128)dp1);
+ if (res != 0xF)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_SSE_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 4/5] examples/l3fwd: fix event vector processing in fib
2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-10-11 9:08 ` [PATCH v4 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-10-11 9:08 ` [PATCH v4 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
@ 2022-10-11 9:08 ` pbhagavatula
2022-10-11 9:08 ` [PATCH v4 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix stack overflow when event vector size is greater than
MAX_BURST_SIZE.
Add missing mac swap and rfc1812 stage.
Fixes: e8adca1951d4 ("examples/l3fwd: support event vector")
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_fib.c | 130 ++++++++++++++++++++++++++-----------
1 file changed, 91 insertions(+), 39 deletions(-)
diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c
index b82e0c0354..407e9def71 100644
--- a/examples/l3fwd/l3fwd_fib.c
+++ b/examples/l3fwd/l3fwd_fib.c
@@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf,
*/
#if !defined FIB_SEND_MULTI
static inline void
-fib_send_single(int nb_tx, struct lcore_conf *qconf,
- struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+process_packet(struct rte_mbuf *pkt, uint16_t *hop)
{
- int32_t j;
struct rte_ether_hdr *eth_hdr;
- for (j = 0; j < nb_tx; j++) {
- /* Run rfc1812 if packet is ipv4 and checks enabled. */
+ /* Run rfc1812 if packet is ipv4 and checks enabled. */
#if defined DO_RFC_1812_CHECKS
- rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
- pkts_burst[j], struct rte_ether_hdr *) + 1),
- &hops[j], pkts_burst[j]->packet_type);
+ rfc1812_process(
+ (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
+ pkt, struct rte_ether_hdr *) +
+ 1),
+ hop, pkt->packet_type);
#endif
- /* Set MAC addresses. */
- eth_hdr = rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *);
- *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]];
- rte_ether_addr_copy(&ports_eth_addr[hops[j]],
- ð_hdr->src_addr);
+ /* Set MAC addresses. */
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+ *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop];
+ rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr);
+}
+static inline void
+fib_send_single(int nb_tx, struct lcore_conf *qconf,
+ struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+{
+ int32_t j;
+
+ for (j = 0; j < nb_tx; j++) {
+ process_packet(pkts_burst[j], &hops[j]);
+ if (hops[j] == BAD_PORT) {
+ rte_pktmbuf_free(pkts_burst[j]);
+ continue;
+ }
/* Send single packet. */
send_single_packet(qconf, pkts_burst[j], hops[j]);
}
@@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
uint32_t ipv4_arr[MAX_PKT_BURST];
uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
- uint16_t nh;
+ uint16_t nh, hops[MAX_PKT_BURST];
uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
uint32_t ipv4_arr_assem, ipv6_arr_assem;
@@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- events[i].mbuf->port = nh;
+ hops[i] = nh != FIB_DEFAULT_HOP ?
+ nh :
+ events[i].mbuf->port;
+ process_packet(events[i].mbuf, &hops[i]);
+ events[i].mbuf->port = hops[i] != BAD_PORT ?
+ hops[i] :
+ events[i].mbuf->port;
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-fib_process_event_vector(struct rte_event_vector *vec)
+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr,
+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6,
+ uint32_t *ipv4_arr, uint16_t *hops)
{
- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
uint32_t ipv4_arr_assem, ipv6_arr_assem;
struct rte_mbuf **mbufs = vec->mbufs;
- uint32_t ipv4_arr[MAX_PKT_BURST];
- uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
struct lcore_conf *lconf;
uint16_t nh;
@@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector *vec)
/* Lookup IPv6 hops if IPv6 packets are present. */
if (ipv6_cnt > 0)
- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr,
- hopsv6, ipv6_cnt);
-
- if (vec->attr_valid) {
- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0];
- if (nh != FIB_DEFAULT_HOP)
- vec->port = nh;
- else
- vec->attr_valid = 0;
- }
+ rte_fib6_lookup_bulk(
+ lconf->ipv6_lookup_struct,
+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6,
+ ipv6_cnt);
/* Assign ports looked up in fib depending on IPv4 or IPv6 */
for (i = 0; i < vec->nb_elem; i++) {
@@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec)
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- mbufs[i]->port = nh;
- event_vector_attr_validate(vec, mbufs[i]);
+ hops[i] = nh;
+ else
+ hops[i] = vec->attr_valid ? vec->port :
+ vec->mbufs[i]->port;
}
+
+#if defined FIB_SEND_MULTI
+ uint16_t k;
+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != k; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &hops[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#endif
+
+ process_event_vector(vec, hops);
}
static __rte_always_inline void
@@ -496,10 +521,37 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
const uint8_t event_d_id = evt_rsrc->event_d_id;
const uint16_t deq_len = evt_rsrc->deq_depth;
struct rte_event events[MAX_PKT_BURST];
+ uint8_t *type_arr, **ipv6_arr, *ptr;
int nb_enq = 0, nb_deq = 0, i;
-
- if (event_p_id < 0)
+ uint64_t *hopsv4, *hopsv6;
+ uint32_t *ipv4_arr;
+ uint16_t *hops;
+ uintptr_t mem;
+
+ mem = (uintptr_t)rte_zmalloc(
+ "vector_fib",
+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) +
+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) +
+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) *
+ evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (mem == 0)
return;
+ ipv4_arr = (uint32_t *)mem;
+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size];
+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size];
+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size];
+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size];
+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size];
+
+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size];
+ for (i = 0; i < evt_rsrc->vector_size; i++)
+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i];
+
+ if (event_p_id < 0) {
+ rte_free(mem);
+ return;
+ }
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__,
rte_lcore_id());
@@ -519,10 +571,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- fib_process_event_vector(events[i].vec);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ fib_process_event_vector(events[i].vec, type_arr,
+ ipv6_arr, hopsv4, hopsv6,
+ ipv4_arr, hops);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -546,6 +597,7 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free(mem);
}
int __rte_noinline
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 5/5] examples/l3fwd: use em vector path for event vector
2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (2 preceding siblings ...)
2022-10-11 9:08 ` [PATCH v4 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
@ 2022-10-11 9:08 ` pbhagavatula
2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use em vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em.c | 13 +++--
examples/l3fwd/l3fwd_em.h | 29 +++++------
examples/l3fwd/l3fwd_em_hlm.h | 72 +++++-----------------------
examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++----
examples/l3fwd/l3fwd_event.h | 21 --------
5 files changed, 48 insertions(+), 112 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index a203dc9e46..35de31157e 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -860,10 +860,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
unsigned int lcore_id;
+ uint16_t *dst_ports;
if (event_p_id < 0)
return;
+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_ports == NULL)
+ return;
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
@@ -885,13 +890,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
}
#if defined RTE_ARCH_X86 || defined __ARM_NEON
- l3fwd_em_process_event_vector(events[i].vec, lconf);
+ l3fwd_em_process_event_vector(events[i].vec, lconf,
+ dst_ports);
#else
l3fwd_em_no_opt_process_event_vector(events[i].vec,
- lconf);
+ lconf, dst_ports);
#endif
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -915,6 +919,7 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free(dst_ports);
}
int __rte_noinline
diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h
index fe2ee59f6a..7d051fc076 100644
--- a/examples/l3fwd/l3fwd_em.h
+++ b/examples/l3fwd/l3fwd_em.h
@@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid,
}
}
-static __rte_always_inline void
+static __rte_always_inline uint16_t
l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
{
struct rte_ether_hdr *eth_hdr;
@@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf);
else
m->port = BAD_PORT;
+
+ return m->port;
}
/*
@@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events,
static inline void
l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf,
+ uint16_t *dst_ports)
{
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i;
@@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *));
- /* Process first packet to init vector attributes */
- l3fwd_em_simple_process(mbufs[0], qconf);
- if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
- }
-
/*
* Prefetch and forward already prefetched packets.
*/
- for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
+ for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
rte_prefetch0(
rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *));
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
}
/* Forward remaining prefetched packets */
- for (; i < vec->nb_elem; i++) {
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ for (; i < vec->nb_elem; i++)
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 12b997e477..2e11eefad7 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_port)
{
- struct rte_mbuf **mbufs = vec->mbufs;
- uint16_t dst_port[MAX_PKT_BURST];
- int32_t i, j, n, pos;
-
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++)
- rte_prefetch0(
- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1);
+ uint16_t i;
if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
- uint32_t pkt_type =
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP;
- uint32_t l3_type, tcp_or_udp;
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
- pkt_type &= mbufs[j + i]->packet_type;
-
- l3_type = pkt_type & RTE_PTYPE_L3_MASK;
- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
-
- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem;
- i++, pos++) {
- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos],
- struct rte_ether_hdr *) +
- 1);
- }
-
- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else {
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port =
- em_get_dst_port(qconf, mbufs[j + i],
- mbufs[j + i]->port);
- process_packet(mbufs[j + i],
- &mbufs[j + i]->port);
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- continue;
- }
- processx4_step3(&mbufs[j], &dst_port[j]);
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port = dst_port[j + i];
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- }
-
- for (; j < vec->nb_elem; j++) {
- mbufs[j]->port =
- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port);
- process_packet(mbufs[j], &mbufs[j]->port);
- event_vector_attr_validate(vec, mbufs[j]);
- }
+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port,
+ vec->port, qconf, 1);
+ else
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_em_process_packets(1, &vec->mbufs[i],
+ &dst_port[i],
+ vec->mbufs[i]->port, qconf, 1);
+
+ process_event_vector(vec, dst_port);
}
#endif /* __L3FWD_EM_HLM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index d2f75edb8a..067f23889a 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events,
for (i = 1, j = 0; j < nb_rx; i++, j++) {
struct rte_mbuf *mbuf = events[j]->mbuf;
+ uint16_t port;
if (i < nb_rx) {
rte_prefetch0(rte_pktmbuf_mtod(
events[i]->mbuf,
struct rte_ether_hdr *) + 1);
}
+ port = mbuf->port;
mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
process_packet(mbuf, &mbuf->port);
+ if (mbuf->port == BAD_PORT)
+ mbuf->port = port;
}
}
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_ports)
{
+ const uint8_t attr_valid = vec->attr_valid;
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i, j;
rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1);
- if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
for (i = 0, j = 1; i < vec->nb_elem; i++, j++) {
if (j < vec->nb_elem)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[j],
struct rte_ether_hdr *) +
1);
- mbufs[i]->port =
- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port);
- process_packet(mbufs[i], &mbufs[i]->port);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
+ attr_valid ? vec->port :
+ mbufs[i]->port);
}
+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != j; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &dst_ports[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &dst_ports[i]);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_SEQUENTIAL_H__ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index 3fe38aada0..e21817c36b 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
}
#endif
-static inline void
-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
-{
- /* l3fwd application only changes mbuf port while processing */
- if (vec->attr_valid && (vec->port != mbuf->port))
- vec->attr_valid = 0;
-}
-
-static inline void
-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
-{
- if (vec->attr_valid) {
- vec->queue = txq;
- } else {
- int i;
-
- for (i = 0; i < vec->nb_elem; i++)
- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq);
- }
-}
-
static inline uint16_t
filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
uint16_t nb_pkts)
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 1/5] examples/l3fwd: fix port group mask generation
2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (3 preceding siblings ...)
2022-10-11 9:08 ` [PATCH v4 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
@ 2022-10-11 10:12 ` pbhagavatula
2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula
` (5 more replies)
4 siblings, 6 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw)
To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix port group mask generation in altivec, vec_any_eq returns
0 or 1 while port_groupx4 expects comparison mask result.
Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc")
Cc: stable@dpdk.org
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v5 Changes:
- Fix compilation errors.
v4 Changes:
- Fix missing `rte_free`.
v3 Changes:
- PPC optimize port mask generation.
- Fix aarch32 compilation.
v2 Changes:
- Fix PPC, RISC-V, aarch32 compilation.
examples/common/altivec/port_group.h | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h
index 5e209b02fa..1c05bc025a 100644
--- a/examples/common/altivec/port_group.h
+++ b/examples/common/altivec/port_group.h
@@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
uint16_t u16[FWDSTEP + 1];
uint64_t u64;
} *pnum = (void *)pn;
-
+ __vector unsigned long long result;
+ const __vector unsigned int perm_mask = {0x00204060, 0x80808080,
+ 0x80808080, 0x80808080};
int32_t v;
- v = vec_any_eq(dp1, dp2);
-
+ dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2);
+ dp1 = vec_mergeh(dp1, dp1);
+ result = (__vector unsigned long long)vec_vbpermq(
+ (__vector unsigned char)dp1, (__vector unsigned char)perm_mask);
+ v = result[1];
/* update last port counter. */
lp[0] += gptbl[v].lpv;
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 2/5] examples/l3fwd: split processing and send stages
2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
@ 2022-10-11 10:12 ` pbhagavatula
2022-10-17 12:06 ` [EXT] " Shijith Thotton
2022-10-11 10:12 ` [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
` (4 subsequent siblings)
5 siblings, 1 reply; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Split packet processing from packet send stage, as send stage
is not common for poll and event mode.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++-----------
examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++---
examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++-------
examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++---
4 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index e76f2760b0..12b997e477 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
return portid;
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t *dst_port, uint16_t portid,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i, j, pos;
- uint16_t dst_port[MAX_PKT_BURST];
/*
* Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets
@@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
dst_port[j + i] = em_get_dst_port(qconf,
pkts_burst[j + i], portid);
}
+
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
}
- for (; j < nb_rx; j++)
+ for (; j < nb_rx; j++) {
dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &pkts_burst[j]->port);
+ }
+}
- send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0);
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
/*
@@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
*/
int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) {
+ for (j = 0; j < nb_rx; j++)
pkts_burst[j] = ev[j]->mbuf;
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *) + 1);
- }
for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
@@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
}
continue;
}
- processx4_step3(&pkts_burst[j], &dst_port[j]);
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
pkts_burst[j + i]->port = dst_port[j + i];
diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h
index 0c6852a7bb..adb82f1478 100644
--- a/examples/l3fwd/l3fwd_lpm_altivec.h
+++ b/examples/l3fwd/l3fwd_lpm_altivec.h
@@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint8_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__vector unsigned int dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
ipv4_flag[j / FWDSTEP],
portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h
index 78ee83b76c..2a68c4c15e 100644
--- a/examples/l3fwd/l3fwd_lpm_neon.h
+++ b/examples/l3fwd/l3fwd_lpm_neon.h
@@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf,
}
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i = 0, j = 0;
- uint16_t dst_port[MAX_PKT_BURST];
int32x4_t dip;
uint32_t ipv4_flag;
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
void *));
}
-
for (j = 0; j != k - FWDSTEP; j += FWDSTEP) {
for (i = 0; i < FWDSTEP; i++) {
rte_prefetch0(rte_pktmbuf_mtod(
@@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid,
&pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
}
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j],
&dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
j += FWDSTEP;
}
@@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
void *));
j++;
}
-
j -= m;
/* Classify last up to 3 packets one by one */
switch (m) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
}
}
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
index 3f637a23d1..db15030320 100644
--- a/examples/l3fwd/l3fwd_lpm_sse.h
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__m128i dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step2(qconf, dip[j / FWDSTEP],
ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector
2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula
@ 2022-10-11 10:12 ` pbhagavatula
2022-10-17 12:06 ` [EXT] " Shijith Thotton
2022-10-11 10:12 ` [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
` (3 subsequent siblings)
5 siblings, 1 reply; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw)
To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use lpm vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++
examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++
examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++--------
examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++
examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++
5 files changed, 214 insertions(+), 16 deletions(-)
diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h
index 87018f5dbe..e45e138e59 100644
--- a/examples/l3fwd/l3fwd_altivec.h
+++ b/examples/l3fwd/l3fwd_altivec.h
@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __vector unsigned short dp1;
+ __vector unsigned short dp;
+
+ dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
+ dp1 = *((__vector unsigned short *)&dst_ports[i]);
+ res = vec_all_eq(dp1, dp);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_ALTIVEC_H_ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index b93841a16f..3fe38aada0 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -82,6 +82,27 @@ struct l3fwd_event_resources {
uint64_t vector_tmo_ns;
};
+#if defined(RTE_ARCH_X86)
+#include "l3fwd_sse.h"
+#elif defined __ARM_NEON
+#include "l3fwd_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "l3fwd_altivec.h"
+#else
+static inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ int i;
+
+ for (i = 0; i < nb_elem; i++) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ }
+
+ return dst_ports[0];
+}
+#endif
+
static inline void
event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
{
@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
}
}
+static inline uint16_t
+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
+ uint16_t nb_pkts)
+{
+ uint16_t *des_pos, free = 0;
+ struct rte_mbuf **pos;
+ int i;
+
+ /* Filter out and free bad packets */
+ for (i = 0; i < nb_pkts; i++) {
+ if (dst_port[i] == BAD_PORT) {
+ rte_pktmbuf_free(mbufs[i]);
+ if (!free) {
+ pos = &mbufs[i];
+ des_pos = &dst_port[i];
+ }
+ free++;
+ continue;
+ }
+
+ if (free) {
+ *pos = mbufs[i];
+ pos++;
+ *des_pos = dst_port[i];
+ des_pos++;
+ }
+ }
+ return nb_pkts - free;
+}
+
+static inline void
+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
+{
+ uint16_t port, i;
+
+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem);
+ /* Verify destination array */
+ port = process_dst_port(dst_port, vec->nb_elem);
+ if (port == BAD_PORT) {
+ vec->attr_valid = 0;
+ for (i = 0; i < vec->nb_elem; i++) {
+ vec->mbufs[i]->port = dst_port[i];
+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
+ }
+ } else {
+ vec->attr_valid = 1;
+ vec->port = port;
+ vec->queue = 0;
+ }
+}
struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index 22d7f61a42..5172979c72 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf)
+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf,
+ uint16_t *dst_port)
{
struct rte_mbuf **mbufs = vec->mbufs;
int i;
- /* Process first packet to init vector attributes */
- lpm_process_event_pkt(lconf, mbufs[0]);
+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64
if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
+ dst_port, lconf, 1);
+ } else {
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
+ &dst_port[i], lconf, 1);
}
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]);
+#endif
- for (i = 1; i < vec->nb_elem; i++) {
- lpm_process_event_pkt(lconf, mbufs[i]);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ process_event_vector(vec, dst_port);
}
/* Same eventdev loop for single and burst of vector */
@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
struct rte_event events[MAX_PKT_BURST];
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
+ uint16_t *dst_port_list;
unsigned int lcore_id;
if (event_p_id < 0)
@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
-
+ dst_port_list =
+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_port_list == NULL)
+ return;
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
while (!force_quit) {
@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- lpm_process_event_vector(events[i].vec, lconf);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ lpm_process_event_vector(events[i].vec, lconf,
+ dst_port_list);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free(dst_port_list);
}
int __rte_noinline
diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
index ce515e0bc4..bf365341fb 100644
--- a/examples/l3fwd/l3fwd_neon.h
+++ b/examples/l3fwd/l3fwd_neon.h
@@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0;
+
+#if defined(RTE_ARCH_ARM64)
+ uint16_t res;
+
+ while (nb_elem > 7) {
+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
+ uint16x8_t dp1;
+
+ dp1 = vld1q_u16(&dst_ports[i]);
+ dp1 = vceqq_u16(dp1, dp);
+ res = vminvq_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ uint16x4_t dp = vdup_n_u16(dst_ports[0]);
+ uint16x4_t dp1;
+
+ dp1 = vld1_u16(&dst_ports[i]);
+ dp1 = vceq_u16(dp1, dp);
+ res = vminv_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+#endif
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_NEON_H_ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index 0f0d0323a2..083729cdef 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ res = _mm_movemask_epi8(dp1);
+ if (res != 0xFFFF)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
+ res = _mm_movemask_ps((__m128)dp1);
+ if (res != 0xF)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_SSE_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib
2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-10-11 10:12 ` [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
@ 2022-10-11 10:12 ` pbhagavatula
2022-10-17 12:06 ` [EXT] " Shijith Thotton
2022-10-11 10:12 ` [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
` (2 subsequent siblings)
5 siblings, 1 reply; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix stack overflow when event vector size is greater than
MAX_BURST_SIZE.
Add missing mac swap and rfc1812 stage.
Fixes: e8adca1951d4 ("examples/l3fwd: support event vector")
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_fib.c | 130 ++++++++++++++++++++++++++-----------
1 file changed, 91 insertions(+), 39 deletions(-)
diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c
index b82e0c0354..edc0dd69b9 100644
--- a/examples/l3fwd/l3fwd_fib.c
+++ b/examples/l3fwd/l3fwd_fib.c
@@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf,
*/
#if !defined FIB_SEND_MULTI
static inline void
-fib_send_single(int nb_tx, struct lcore_conf *qconf,
- struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+process_packet(struct rte_mbuf *pkt, uint16_t *hop)
{
- int32_t j;
struct rte_ether_hdr *eth_hdr;
- for (j = 0; j < nb_tx; j++) {
- /* Run rfc1812 if packet is ipv4 and checks enabled. */
+ /* Run rfc1812 if packet is ipv4 and checks enabled. */
#if defined DO_RFC_1812_CHECKS
- rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
- pkts_burst[j], struct rte_ether_hdr *) + 1),
- &hops[j], pkts_burst[j]->packet_type);
+ rfc1812_process(
+ (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
+ pkt, struct rte_ether_hdr *) +
+ 1),
+ hop, pkt->packet_type);
#endif
- /* Set MAC addresses. */
- eth_hdr = rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *);
- *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]];
- rte_ether_addr_copy(&ports_eth_addr[hops[j]],
- ð_hdr->src_addr);
+ /* Set MAC addresses. */
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+ *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop];
+ rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr);
+}
+static inline void
+fib_send_single(int nb_tx, struct lcore_conf *qconf,
+ struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+{
+ int32_t j;
+
+ for (j = 0; j < nb_tx; j++) {
+ process_packet(pkts_burst[j], &hops[j]);
+ if (hops[j] == BAD_PORT) {
+ rte_pktmbuf_free(pkts_burst[j]);
+ continue;
+ }
/* Send single packet. */
send_single_packet(qconf, pkts_burst[j], hops[j]);
}
@@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
uint32_t ipv4_arr[MAX_PKT_BURST];
uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
- uint16_t nh;
+ uint16_t nh, hops[MAX_PKT_BURST];
uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
uint32_t ipv4_arr_assem, ipv6_arr_assem;
@@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- events[i].mbuf->port = nh;
+ hops[i] = nh != FIB_DEFAULT_HOP ?
+ nh :
+ events[i].mbuf->port;
+ process_packet(events[i].mbuf, &hops[i]);
+ events[i].mbuf->port = hops[i] != BAD_PORT ?
+ hops[i] :
+ events[i].mbuf->port;
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-fib_process_event_vector(struct rte_event_vector *vec)
+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr,
+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6,
+ uint32_t *ipv4_arr, uint16_t *hops)
{
- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
uint32_t ipv4_arr_assem, ipv6_arr_assem;
struct rte_mbuf **mbufs = vec->mbufs;
- uint32_t ipv4_arr[MAX_PKT_BURST];
- uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
struct lcore_conf *lconf;
uint16_t nh;
@@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector *vec)
/* Lookup IPv6 hops if IPv6 packets are present. */
if (ipv6_cnt > 0)
- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr,
- hopsv6, ipv6_cnt);
-
- if (vec->attr_valid) {
- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0];
- if (nh != FIB_DEFAULT_HOP)
- vec->port = nh;
- else
- vec->attr_valid = 0;
- }
+ rte_fib6_lookup_bulk(
+ lconf->ipv6_lookup_struct,
+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6,
+ ipv6_cnt);
/* Assign ports looked up in fib depending on IPv4 or IPv6 */
for (i = 0; i < vec->nb_elem; i++) {
@@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec)
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- mbufs[i]->port = nh;
- event_vector_attr_validate(vec, mbufs[i]);
+ hops[i] = nh;
+ else
+ hops[i] = vec->attr_valid ? vec->port :
+ vec->mbufs[i]->port;
}
+
+#if defined FIB_SEND_MULTI
+ uint16_t k;
+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != k; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &hops[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#endif
+
+ process_event_vector(vec, hops);
}
static __rte_always_inline void
@@ -496,10 +521,37 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
const uint8_t event_d_id = evt_rsrc->event_d_id;
const uint16_t deq_len = evt_rsrc->deq_depth;
struct rte_event events[MAX_PKT_BURST];
+ uint8_t *type_arr, **ipv6_arr, *ptr;
int nb_enq = 0, nb_deq = 0, i;
-
- if (event_p_id < 0)
+ uint64_t *hopsv4, *hopsv6;
+ uint32_t *ipv4_arr;
+ uint16_t *hops;
+ uintptr_t mem;
+
+ mem = (uintptr_t)rte_zmalloc(
+ "vector_fib",
+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) +
+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) +
+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) *
+ evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (mem == 0)
return;
+ ipv4_arr = (uint32_t *)mem;
+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size];
+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size];
+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size];
+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size];
+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size];
+
+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size];
+ for (i = 0; i < evt_rsrc->vector_size; i++)
+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i];
+
+ if (event_p_id < 0) {
+ rte_free((void *)mem);
+ return;
+ }
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__,
rte_lcore_id());
@@ -519,10 +571,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- fib_process_event_vector(events[i].vec);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ fib_process_event_vector(events[i].vec, type_arr,
+ ipv6_arr, hopsv4, hopsv6,
+ ipv4_arr, hops);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -546,6 +597,7 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free((void *)mem);
}
int __rte_noinline
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector
2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (2 preceding siblings ...)
2022-10-11 10:12 ` [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
@ 2022-10-11 10:12 ` pbhagavatula
2022-10-12 8:57 ` [EXT] " Shijith Thotton
2022-10-17 12:05 ` [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation Shijith Thotton
2022-10-25 16:05 ` [PATCH v6 " pbhagavatula
5 siblings, 1 reply; 41+ messages in thread
From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw)
To: jerinj; +Cc: dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use em vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_em.c | 13 +++--
examples/l3fwd/l3fwd_em.h | 29 +++++------
examples/l3fwd/l3fwd_em_hlm.h | 72 +++++-----------------------
examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++----
examples/l3fwd/l3fwd_event.h | 21 --------
5 files changed, 48 insertions(+), 112 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index a203dc9e46..35de31157e 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -860,10 +860,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
unsigned int lcore_id;
+ uint16_t *dst_ports;
if (event_p_id < 0)
return;
+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_ports == NULL)
+ return;
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
@@ -885,13 +890,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
}
#if defined RTE_ARCH_X86 || defined __ARM_NEON
- l3fwd_em_process_event_vector(events[i].vec, lconf);
+ l3fwd_em_process_event_vector(events[i].vec, lconf,
+ dst_ports);
#else
l3fwd_em_no_opt_process_event_vector(events[i].vec,
- lconf);
+ lconf, dst_ports);
#endif
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -915,6 +919,7 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free(dst_ports);
}
int __rte_noinline
diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h
index fe2ee59f6a..7d051fc076 100644
--- a/examples/l3fwd/l3fwd_em.h
+++ b/examples/l3fwd/l3fwd_em.h
@@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid,
}
}
-static __rte_always_inline void
+static __rte_always_inline uint16_t
l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
{
struct rte_ether_hdr *eth_hdr;
@@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf);
else
m->port = BAD_PORT;
+
+ return m->port;
}
/*
@@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events,
static inline void
l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf,
+ uint16_t *dst_ports)
{
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i;
@@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *));
- /* Process first packet to init vector attributes */
- l3fwd_em_simple_process(mbufs[0], qconf);
- if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
- }
-
/*
* Prefetch and forward already prefetched packets.
*/
- for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
+ for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
rte_prefetch0(
rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *));
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
}
/* Forward remaining prefetched packets */
- for (; i < vec->nb_elem; i++) {
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ for (; i < vec->nb_elem; i++)
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 12b997e477..2e11eefad7 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_port)
{
- struct rte_mbuf **mbufs = vec->mbufs;
- uint16_t dst_port[MAX_PKT_BURST];
- int32_t i, j, n, pos;
-
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++)
- rte_prefetch0(
- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1);
+ uint16_t i;
if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
- uint32_t pkt_type =
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP;
- uint32_t l3_type, tcp_or_udp;
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
- pkt_type &= mbufs[j + i]->packet_type;
-
- l3_type = pkt_type & RTE_PTYPE_L3_MASK;
- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
-
- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem;
- i++, pos++) {
- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos],
- struct rte_ether_hdr *) +
- 1);
- }
-
- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else {
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port =
- em_get_dst_port(qconf, mbufs[j + i],
- mbufs[j + i]->port);
- process_packet(mbufs[j + i],
- &mbufs[j + i]->port);
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- continue;
- }
- processx4_step3(&mbufs[j], &dst_port[j]);
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port = dst_port[j + i];
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- }
-
- for (; j < vec->nb_elem; j++) {
- mbufs[j]->port =
- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port);
- process_packet(mbufs[j], &mbufs[j]->port);
- event_vector_attr_validate(vec, mbufs[j]);
- }
+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port,
+ vec->port, qconf, 1);
+ else
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_em_process_packets(1, &vec->mbufs[i],
+ &dst_port[i],
+ vec->mbufs[i]->port, qconf, 1);
+
+ process_event_vector(vec, dst_port);
}
#endif /* __L3FWD_EM_HLM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index d2f75edb8a..067f23889a 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events,
for (i = 1, j = 0; j < nb_rx; i++, j++) {
struct rte_mbuf *mbuf = events[j]->mbuf;
+ uint16_t port;
if (i < nb_rx) {
rte_prefetch0(rte_pktmbuf_mtod(
events[i]->mbuf,
struct rte_ether_hdr *) + 1);
}
+ port = mbuf->port;
mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
process_packet(mbuf, &mbuf->port);
+ if (mbuf->port == BAD_PORT)
+ mbuf->port = port;
}
}
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_ports)
{
+ const uint8_t attr_valid = vec->attr_valid;
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i, j;
rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1);
- if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
for (i = 0, j = 1; i < vec->nb_elem; i++, j++) {
if (j < vec->nb_elem)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[j],
struct rte_ether_hdr *) +
1);
- mbufs[i]->port =
- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port);
- process_packet(mbufs[i], &mbufs[i]->port);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
+ attr_valid ? vec->port :
+ mbufs[i]->port);
}
+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != j; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &dst_ports[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &dst_ports[i]);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_SEQUENTIAL_H__ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index 3fe38aada0..e21817c36b 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
}
#endif
-static inline void
-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
-{
- /* l3fwd application only changes mbuf port while processing */
- if (vec->attr_valid && (vec->port != mbuf->port))
- vec->attr_valid = 0;
-}
-
-static inline void
-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
-{
- if (vec->attr_valid) {
- vec->queue = txq;
- } else {
- int i;
-
- for (i = 0; i < vec->nb_elem; i++)
- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq);
- }
-}
-
static inline uint16_t
filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
uint16_t nb_pkts)
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector
2022-10-11 10:12 ` [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
@ 2022-10-12 8:57 ` Shijith Thotton
0 siblings, 0 replies; 41+ messages in thread
From: Shijith Thotton @ 2022-10-12 8:57 UTC (permalink / raw)
To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran
Cc: dev, Pavan Nikhilesh Bhagavatula
>From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
>Use em vector path to process event vector.
>
>Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>---
> examples/l3fwd/l3fwd_em.c | 13 +++--
> examples/l3fwd/l3fwd_em.h | 29 +++++------
> examples/l3fwd/l3fwd_em_hlm.h | 72 +++++-----------------------
> examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++----
> examples/l3fwd/l3fwd_event.h | 21 --------
> 5 files changed, 48 insertions(+), 112 deletions(-)
>
>diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
>index a203dc9e46..35de31157e 100644
>--- a/examples/l3fwd/l3fwd_em.c
>+++ b/examples/l3fwd/l3fwd_em.c
>@@ -860,10 +860,15 @@ em_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> int i, nb_enq = 0, nb_deq = 0;
> struct lcore_conf *lconf;
> unsigned int lcore_id;
>+ uint16_t *dst_ports;
>
> if (event_p_id < 0)
> return;
>
>+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
>+ RTE_CACHE_LINE_SIZE);
>+ if (dst_ports == NULL)
>+ return;
> lcore_id = rte_lcore_id();
> lconf = &lcore_conf[lcore_id];
>
>@@ -885,13 +890,12 @@ em_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> }
>
> #if defined RTE_ARCH_X86 || defined __ARM_NEON
>- l3fwd_em_process_event_vector(events[i].vec, lconf);
>+ l3fwd_em_process_event_vector(events[i].vec, lconf,
>+ dst_ports);
> #else
> l3fwd_em_no_opt_process_event_vector(events[i].vec,
>- lconf);
>+ lconf, dst_ports);
> #endif
>- if (flags & L3FWD_EVENT_TX_DIRECT)
>- event_vector_txq_set(events[i].vec, 0);
> }
>
> if (flags & L3FWD_EVENT_TX_ENQ) {
>@@ -915,6 +919,7 @@ em_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
>
> l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
> nb_deq, 1);
>+ rte_free(dst_ports);
> }
>
> int __rte_noinline
>diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h
>index fe2ee59f6a..7d051fc076 100644
>--- a/examples/l3fwd/l3fwd_em.h
>+++ b/examples/l3fwd/l3fwd_em.h
>@@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t
>portid,
> }
> }
>
>-static __rte_always_inline void
>+static __rte_always_inline uint16_t
> l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
> {
> struct rte_ether_hdr *eth_hdr;
>@@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct
>lcore_conf *qconf)
> m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf);
> else
> m->port = BAD_PORT;
>+
>+ return m->port;
> }
>
> /*
>@@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct
>rte_event **events,
>
> static inline void
> l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
>- struct lcore_conf *qconf)
>+ struct lcore_conf *qconf,
>+ uint16_t *dst_ports)
> {
> struct rte_mbuf **mbufs = vec->mbufs;
> int32_t i;
>@@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct
>rte_event_vector *vec,
> for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++)
> rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *));
>
>- /* Process first packet to init vector attributes */
>- l3fwd_em_simple_process(mbufs[0], qconf);
>- if (vec->attr_valid) {
>- if (mbufs[0]->port != BAD_PORT)
>- vec->port = mbufs[0]->port;
>- else
>- vec->attr_valid = 0;
>- }
>-
> /*
> * Prefetch and forward already prefetched packets.
> */
>- for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
>+ for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
> rte_prefetch0(
> rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void
>*));
>- l3fwd_em_simple_process(mbufs[i], qconf);
>- event_vector_attr_validate(vec, mbufs[i]);
>+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
> }
>
> /* Forward remaining prefetched packets */
>- for (; i < vec->nb_elem; i++) {
>- l3fwd_em_simple_process(mbufs[i], qconf);
>- event_vector_attr_validate(vec, mbufs[i]);
>- }
>+ for (; i < vec->nb_elem; i++)
>+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
>+
>+ process_event_vector(vec, dst_ports);
> }
>
> #endif /* __L3FWD_EM_H__ */
>diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
>index 12b997e477..2e11eefad7 100644
>--- a/examples/l3fwd/l3fwd_em_hlm.h
>+++ b/examples/l3fwd/l3fwd_em_hlm.h
>@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event
>**ev,
>
> static inline void
> l3fwd_em_process_event_vector(struct rte_event_vector *vec,
>- struct lcore_conf *qconf)
>+ struct lcore_conf *qconf, uint16_t *dst_port)
> {
>- struct rte_mbuf **mbufs = vec->mbufs;
>- uint16_t dst_port[MAX_PKT_BURST];
>- int32_t i, j, n, pos;
>-
>- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++)
>- rte_prefetch0(
>- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1);
>+ uint16_t i;
>
> if (vec->attr_valid)
>- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
>-
>- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT);
>- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
>- uint32_t pkt_type =
>- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP |
>RTE_PTYPE_L4_UDP;
>- uint32_t l3_type, tcp_or_udp;
>-
>- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
>- pkt_type &= mbufs[j + i]->packet_type;
>-
>- l3_type = pkt_type & RTE_PTYPE_L3_MASK;
>- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP |
>RTE_PTYPE_L4_UDP);
>-
>- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
>- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem;
>- i++, pos++) {
>- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos],
>- struct rte_ether_hdr *) +
>- 1);
>- }
>-
>- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
>- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j],
>- &dst_port[j]);
>- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
>- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j],
>- &dst_port[j]);
>- } else {
>- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
>- mbufs[j + i]->port =
>- em_get_dst_port(qconf, mbufs[j + i],
>- mbufs[j + i]->port);
>- process_packet(mbufs[j + i],
>- &mbufs[j + i]->port);
>- event_vector_attr_validate(vec, mbufs[j + i]);
>- }
>- continue;
>- }
>- processx4_step3(&mbufs[j], &dst_port[j]);
>-
>- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
>- mbufs[j + i]->port = dst_port[j + i];
>- event_vector_attr_validate(vec, mbufs[j + i]);
>- }
>- }
>-
>- for (; j < vec->nb_elem; j++) {
>- mbufs[j]->port =
>- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port);
>- process_packet(mbufs[j], &mbufs[j]->port);
>- event_vector_attr_validate(vec, mbufs[j]);
>- }
>+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs,
>dst_port,
>+ vec->port, qconf, 1);
>+ else
>+ for (i = 0; i < vec->nb_elem; i++)
>+ l3fwd_em_process_packets(1, &vec->mbufs[i],
>+ &dst_port[i],
>+ vec->mbufs[i]->port, qconf, 1);
>+
>+ process_event_vector(vec, dst_port);
> }
>
> #endif /* __L3FWD_EM_HLM_H__ */
>diff --git a/examples/l3fwd/l3fwd_em_sequential.h
>b/examples/l3fwd/l3fwd_em_sequential.h
>index d2f75edb8a..067f23889a 100644
>--- a/examples/l3fwd/l3fwd_em_sequential.h
>+++ b/examples/l3fwd/l3fwd_em_sequential.h
>@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event
>**events,
>
> for (i = 1, j = 0; j < nb_rx; i++, j++) {
> struct rte_mbuf *mbuf = events[j]->mbuf;
>+ uint16_t port;
>
> if (i < nb_rx) {
> rte_prefetch0(rte_pktmbuf_mtod(
> events[i]->mbuf,
> struct rte_ether_hdr *) + 1);
> }
>+ port = mbuf->port;
> mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
> process_packet(mbuf, &mbuf->port);
>+ if (mbuf->port == BAD_PORT)
>+ mbuf->port = port;
> }
> }
>
> static inline void
> l3fwd_em_process_event_vector(struct rte_event_vector *vec,
>- struct lcore_conf *qconf)
>+ struct lcore_conf *qconf, uint16_t *dst_ports)
> {
>+ const uint8_t attr_valid = vec->attr_valid;
> struct rte_mbuf **mbufs = vec->mbufs;
> int32_t i, j;
>
> rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1);
>
>- if (vec->attr_valid)
>- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
>-
> for (i = 0, j = 1; i < vec->nb_elem; i++, j++) {
> if (j < vec->nb_elem)
> rte_prefetch0(rte_pktmbuf_mtod(mbufs[j],
> struct rte_ether_hdr *) +
> 1);
>- mbufs[i]->port =
>- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port);
>- process_packet(mbufs[i], &mbufs[i]->port);
>- event_vector_attr_validate(vec, mbufs[i]);
>+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
>+ attr_valid ? vec->port :
>+ mbufs[i]->port);
> }
>+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
>+
>+ for (i = 0; i != j; i += FWDSTEP)
>+ processx4_step3(&vec->mbufs[i], &dst_ports[i]);
>+ for (; i < vec->nb_elem; i++)
>+ process_packet(vec->mbufs[i], &dst_ports[i]);
>+
>+ process_event_vector(vec, dst_ports);
> }
>
> #endif /* __L3FWD_EM_SEQUENTIAL_H__ */
>diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
>index 3fe38aada0..e21817c36b 100644
>--- a/examples/l3fwd/l3fwd_event.h
>+++ b/examples/l3fwd/l3fwd_event.h
>@@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t
>nb_elem)
> }
> #endif
>
>-static inline void
>-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf
>*mbuf)
>-{
>- /* l3fwd application only changes mbuf port while processing */
>- if (vec->attr_valid && (vec->port != mbuf->port))
>- vec->attr_valid = 0;
>-}
>-
>-static inline void
>-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
>-{
>- if (vec->attr_valid) {
>- vec->queue = txq;
>- } else {
>- int i;
>-
>- for (i = 0; i < vec->nb_elem; i++)
>- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq);
>- }
>-}
>-
> static inline uint16_t
> filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
> uint16_t nb_pkts)
>--
>2.25.1
For the series:
Acked-by: Shijith Thotton <sthotton@marvell.com>
^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation
2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (3 preceding siblings ...)
2022-10-11 10:12 ` [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
@ 2022-10-17 12:05 ` Shijith Thotton
2022-10-20 16:15 ` Pavan Nikhilesh Bhagavatula
2022-10-25 16:05 ` [PATCH v6 " pbhagavatula
5 siblings, 1 reply; 41+ messages in thread
From: Shijith Thotton @ 2022-10-17 12:05 UTC (permalink / raw)
To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran,
David Christensen
Cc: dev, Pavan Nikhilesh Bhagavatula, stable
>
>Fix port group mask generation in altivec, vec_any_eq returns
>0 or 1 while port_groupx4 expects comparison mask result.
>
>Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc")
>Cc: stable@dpdk.org
>
>Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
>---
> v5 Changes:
> - Fix compilation errors.
>
> v4 Changes:
> - Fix missing `rte_free`.
>
> v3 Changes:
> - PPC optimize port mask generation.
> - Fix aarch32 compilation.
>
> v2 Changes:
> - Fix PPC, RISC-V, aarch32 compilation.
>
> examples/common/altivec/port_group.h | 11 ++++++++---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
>diff --git a/examples/common/altivec/port_group.h
>b/examples/common/altivec/port_group.h
>index 5e209b02fa..1c05bc025a 100644
>--- a/examples/common/altivec/port_group.h
>+++ b/examples/common/altivec/port_group.h
>@@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
> uint16_t u16[FWDSTEP + 1];
> uint64_t u64;
> } *pnum = (void *)pn;
>-
>+ __vector unsigned long long result;
>+ const __vector unsigned int perm_mask = {0x00204060, 0x80808080,
>+ 0x80808080, 0x80808080};
> int32_t v;
>
>- v = vec_any_eq(dp1, dp2);
>-
>+ dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2);
>+ dp1 = vec_mergeh(dp1, dp1);
>+ result = (__vector unsigned long long)vec_vbpermq(
>+ (__vector unsigned char)dp1, (__vector unsigned
>char)perm_mask);
>
>+ v = result[1];
> /* update last port counter. */
> lp[0] += gptbl[v].lpv;
>
>--
>2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 2/5] examples/l3fwd: split processing and send stages
2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula
@ 2022-10-17 12:06 ` Shijith Thotton
0 siblings, 0 replies; 41+ messages in thread
From: Shijith Thotton @ 2022-10-17 12:06 UTC (permalink / raw)
To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran,
David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh Bhagavatula
>
>Split packet processing from packet send stage, as send stage
>is not common for poll and event mode.
>
>Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
>---
> examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++-----------
> examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++---
> examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++-------
> examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++---
> 4 files changed, 95 insertions(+), 29 deletions(-)
>
>diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
>index e76f2760b0..12b997e477 100644
>--- a/examples/l3fwd/l3fwd_em_hlm.h
>+++ b/examples/l3fwd/l3fwd_em_hlm.h
>@@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct
>rte_mbuf *pkt,
> return portid;
> }
>
>-/*
>- * Buffer optimized handling of packets, invoked
>- * from main_loop.
>- */
> static inline void
>-l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
>- uint16_t portid, struct lcore_conf *qconf)
>+l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
>+ uint16_t *dst_port, uint16_t portid,
>+ struct lcore_conf *qconf, const uint8_t do_step3)
> {
> int32_t i, j, pos;
>- uint16_t dst_port[MAX_PKT_BURST];
>
> /*
> * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets
>@@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf
>**pkts_burst,
> dst_port[j + i] = em_get_dst_port(qconf,
> pkts_burst[j + i], portid);
> }
>+
>+ for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i +=
>FWDSTEP)
>+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
> }
>
>- for (; j < nb_rx; j++)
>+ for (; j < nb_rx; j++) {
> dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &pkts_burst[j]->port);
>+ }
>+}
>
>- send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
>+/*
>+ * Buffer optimized handling of packets, invoked
>+ * from main_loop.
>+ */
>+static inline void
>+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t
>portid,
>+ struct lcore_conf *qconf)
>+{
>+ uint16_t dst_port[MAX_PKT_BURST];
>
>+ l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf,
>0);
>+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
> }
>
> /*
>@@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event
>**ev,
> */
> int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT);
>
>- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) {
>+ for (j = 0; j < nb_rx; j++)
> pkts_burst[j] = ev[j]->mbuf;
>- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
>- struct rte_ether_hdr *) + 1);
>- }
>
> for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
>
>@@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event
>**ev,
> }
> continue;
> }
>- processx4_step3(&pkts_burst[j], &dst_port[j]);
>+ for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP)
>+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
>
> for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
> pkts_burst[j + i]->port = dst_port[j + i];
>diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h
>b/examples/l3fwd/l3fwd_lpm_altivec.h
>index 0c6852a7bb..adb82f1478 100644
>--- a/examples/l3fwd/l3fwd_lpm_altivec.h
>+++ b/examples/l3fwd/l3fwd_lpm_altivec.h
>@@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf,
> * from main_loop.
> */
> static inline void
>-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
>- uint8_t portid, struct lcore_conf *qconf)
>+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
>+ uint8_t portid, uint16_t *dst_port,
>+ struct lcore_conf *qconf, const uint8_t do_step3)
> {
> int32_t j;
>- uint16_t dst_port[MAX_PKT_BURST];
> __vector unsigned int dip[MAX_PKT_BURST / FWDSTEP];
> uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
> const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
>@@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf
>**pkts_burst,
> ipv4_flag[j / FWDSTEP],
> portid, &pkts_burst[j], &dst_port[j]);
>
>+ if (do_step3)
>+ for (j = 0; j != k; j += FWDSTEP)
>+ processx4_step3(&pkts_burst[j], &dst_port[j]);
>+
> /* Classify last up to 3 packets one by one */
> switch (nb_rx % FWDSTEP) {
> case 3:
> dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &dst_port[j]);
> j++;
> /* fall-through */
> case 2:
> dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &dst_port[j]);
> j++;
> /* fall-through */
> case 1:
> dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &dst_port[j]);
> j++;
> /* fall-through */
> }
>+}
>+
>+static inline void
>+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t
>portid,
>+ struct lcore_conf *qconf)
>+{
>+ uint16_t dst_port[MAX_PKT_BURST];
>
>+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
>+ 0);
> send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
> }
>
>diff --git a/examples/l3fwd/l3fwd_lpm_neon.h
>b/examples/l3fwd/l3fwd_lpm_neon.h
>index 78ee83b76c..2a68c4c15e 100644
>--- a/examples/l3fwd/l3fwd_lpm_neon.h
>+++ b/examples/l3fwd/l3fwd_lpm_neon.h
>@@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf,
> }
> }
>
>-/*
>- * Buffer optimized handling of packets, invoked
>- * from main_loop.
>- */
> static inline void
>-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
>- uint16_t portid, struct lcore_conf *qconf)
>+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
>+ uint16_t portid, uint16_t *dst_port,
>+ struct lcore_conf *qconf, const uint8_t do_step3)
> {
> int32_t i = 0, j = 0;
>- uint16_t dst_port[MAX_PKT_BURST];
> int32x4_t dip;
> uint32_t ipv4_flag;
> const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
>@@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf
>**pkts_burst,
> rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
> void *));
> }
>-
> for (j = 0; j != k - FWDSTEP; j += FWDSTEP) {
> for (i = 0; i < FWDSTEP; i++) {
> rte_prefetch0(rte_pktmbuf_mtod(
>@@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf
>**pkts_burst,
> processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
> processx4_step2(qconf, dip, ipv4_flag, portid,
> &pkts_burst[j], &dst_port[j]);
>+ if (do_step3)
>+ processx4_step3(&pkts_burst[j], &dst_port[j]);
> }
>
> processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
> processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j],
> &dst_port[j]);
>+ if (do_step3)
>+ processx4_step3(&pkts_burst[j], &dst_port[j]);
>
> j += FWDSTEP;
> }
>@@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf
>**pkts_burst,
> void *));
> j++;
> }
>-
> j -= m;
> /* Classify last up to 3 packets one by one */
> switch (m) {
> case 3:
> dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
> portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &dst_port[j]);
> j++;
> /* fallthrough */
> case 2:
> dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
> portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &dst_port[j]);
> j++;
> /* fallthrough */
> case 1:
> dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
> portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &dst_port[j]);
> }
> }
>+}
>+
>+/*
>+ * Buffer optimized handling of packets, invoked
>+ * from main_loop.
>+ */
>+static inline void
>+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t
>portid,
>+ struct lcore_conf *qconf)
>+{
>+ uint16_t dst_port[MAX_PKT_BURST];
>
>+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
>+ 0);
> send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
> }
>
>diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
>index 3f637a23d1..db15030320 100644
>--- a/examples/l3fwd/l3fwd_lpm_sse.h
>+++ b/examples/l3fwd/l3fwd_lpm_sse.h
>@@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf,
> * from main_loop.
> */
> static inline void
>-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
>- uint16_t portid, struct lcore_conf *qconf)
>+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
>+ uint16_t portid, uint16_t *dst_port,
>+ struct lcore_conf *qconf, const uint8_t do_step3)
> {
> int32_t j;
>- uint16_t dst_port[MAX_PKT_BURST];
> __m128i dip[MAX_PKT_BURST / FWDSTEP];
> uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
> const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
>@@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf
>**pkts_burst,
> processx4_step2(qconf, dip[j / FWDSTEP],
> ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j],
>&dst_port[j]);
>
>+ if (do_step3)
>+ for (j = 0; j != k; j += FWDSTEP)
>+ processx4_step3(&pkts_burst[j], &dst_port[j]);
>+
> /* Classify last up to 3 packets one by one */
> switch (nb_rx % FWDSTEP) {
> case 3:
> dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &dst_port[j]);
> j++;
> /* fall-through */
> case 2:
> dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &dst_port[j]);
> j++;
> /* fall-through */
> case 1:
> dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
>+ if (do_step3)
>+ process_packet(pkts_burst[j], &dst_port[j]);
> j++;
> }
>+}
>+
>+static inline void
>+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t
>portid,
>+ struct lcore_conf *qconf)
>+{
>+ uint16_t dst_port[MAX_PKT_BURST];
>
>+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
>+ 0);
> send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
> }
>
>--
>2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector
2022-10-11 10:12 ` [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
@ 2022-10-17 12:06 ` Shijith Thotton
0 siblings, 0 replies; 41+ messages in thread
From: Shijith Thotton @ 2022-10-17 12:06 UTC (permalink / raw)
To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran,
David Christensen, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev
Cc: dev, Pavan Nikhilesh Bhagavatula
>
>Use lpm vector path to process event vector.
>
>Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
>---
> examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++
> examples/l3fwd/l3fwd_event.h | 71
>++++++++++++++++++++++++++++++++++
> examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++--------
> examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++
> examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++
> 5 files changed, 214 insertions(+), 16 deletions(-)
>
>diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h
>index 87018f5dbe..e45e138e59 100644
>--- a/examples/l3fwd/l3fwd_altivec.h
>+++ b/examples/l3fwd/l3fwd_altivec.h
>@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct
>rte_mbuf **pkts_burst,
> }
> }
>
>+static __rte_always_inline uint16_t
>+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
>+{
>+ uint16_t i = 0, res;
>+
>+ while (nb_elem > 7) {
>+ __vector unsigned short dp1;
>+ __vector unsigned short dp;
>+
>+ dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
>+ dp1 = *((__vector unsigned short *)&dst_ports[i]);
>+ res = vec_all_eq(dp1, dp);
>+ if (!res)
>+ return BAD_PORT;
>+
>+ nb_elem -= 8;
>+ i += 8;
>+ }
>+
>+ while (nb_elem) {
>+ if (dst_ports[i] != dst_ports[0])
>+ return BAD_PORT;
>+ nb_elem--;
>+ i++;
>+ }
>+
>+ return dst_ports[0];
>+}
>+
> #endif /* _L3FWD_ALTIVEC_H_ */
>diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
>index b93841a16f..3fe38aada0 100644
>--- a/examples/l3fwd/l3fwd_event.h
>+++ b/examples/l3fwd/l3fwd_event.h
>@@ -82,6 +82,27 @@ struct l3fwd_event_resources {
> uint64_t vector_tmo_ns;
> };
>
>+#if defined(RTE_ARCH_X86)
>+#include "l3fwd_sse.h"
>+#elif defined __ARM_NEON
>+#include "l3fwd_neon.h"
>+#elif defined(RTE_ARCH_PPC_64)
>+#include "l3fwd_altivec.h"
>+#else
>+static inline uint16_t
>+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
>+{
>+ int i;
>+
>+ for (i = 0; i < nb_elem; i++) {
>+ if (dst_ports[i] != dst_ports[0])
>+ return BAD_PORT;
>+ }
>+
>+ return dst_ports[0];
>+}
>+#endif
>+
> static inline void
> event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf
>*mbuf)
> {
>@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec,
>uint16_t txq)
> }
> }
>
>+static inline uint16_t
>+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
>+ uint16_t nb_pkts)
>+{
>+ uint16_t *des_pos, free = 0;
>+ struct rte_mbuf **pos;
>+ int i;
>+
>+ /* Filter out and free bad packets */
>+ for (i = 0; i < nb_pkts; i++) {
>+ if (dst_port[i] == BAD_PORT) {
>+ rte_pktmbuf_free(mbufs[i]);
>+ if (!free) {
>+ pos = &mbufs[i];
>+ des_pos = &dst_port[i];
>+ }
>+ free++;
>+ continue;
>+ }
>+
>+ if (free) {
>+ *pos = mbufs[i];
>+ pos++;
>+ *des_pos = dst_port[i];
>+ des_pos++;
>+ }
>+ }
>
>+ return nb_pkts - free;
>+}
>+
>+static inline void
>+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
>+{
>+ uint16_t port, i;
>+
>+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec-
>>nb_elem);
>+ /* Verify destination array */
>+ port = process_dst_port(dst_port, vec->nb_elem);
>+ if (port == BAD_PORT) {
>+ vec->attr_valid = 0;
>+ for (i = 0; i < vec->nb_elem; i++) {
>+ vec->mbufs[i]->port = dst_port[i];
>+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
>+ }
>+ } else {
>+ vec->attr_valid = 1;
>+ vec->port = port;
>+ vec->queue = 0;
>+ }
>+}
>
> struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
> void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
>diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
>index 22d7f61a42..5172979c72 100644
>--- a/examples/l3fwd/l3fwd_lpm.c
>+++ b/examples/l3fwd/l3fwd_lpm.c
>@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void
>*dummy)
> }
>
> static __rte_always_inline void
>-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf
>*lconf)
>+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf
>*lconf,
>+ uint16_t *dst_port)
> {
> struct rte_mbuf **mbufs = vec->mbufs;
> int i;
>
>- /* Process first packet to init vector attributes */
>- lpm_process_event_pkt(lconf, mbufs[0]);
>+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined
>RTE_ARCH_PPC_64
> if (vec->attr_valid) {
>- if (mbufs[0]->port != BAD_PORT)
>- vec->port = mbufs[0]->port;
>- else
>- vec->attr_valid = 0;
>+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
>+ dst_port, lconf, 1);
>+ } else {
>+ for (i = 0; i < vec->nb_elem; i++)
>+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
>+ &dst_port[i], lconf, 1);
> }
>+#else
>+ for (i = 0; i < vec->nb_elem; i++)
>+ dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]);
>+#endif
>
>- for (i = 1; i < vec->nb_elem; i++) {
>- lpm_process_event_pkt(lconf, mbufs[i]);
>- event_vector_attr_validate(vec, mbufs[i]);
>- }
>+ process_event_vector(vec, dst_port);
> }
>
> /* Same eventdev loop for single and burst of vector */
>@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> struct rte_event events[MAX_PKT_BURST];
> int i, nb_enq = 0, nb_deq = 0;
> struct lcore_conf *lconf;
>+ uint16_t *dst_port_list;
> unsigned int lcore_id;
>
> if (event_p_id < 0)
>@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
>
> lcore_id = rte_lcore_id();
> lconf = &lcore_conf[lcore_id];
>-
>+ dst_port_list =
>+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
>+ RTE_CACHE_LINE_SIZE);
>+ if (dst_port_list == NULL)
>+ return;
> RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
>
> while (!force_quit) {
>@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> events[i].op = RTE_EVENT_OP_FORWARD;
> }
>
>- lpm_process_event_vector(events[i].vec, lconf);
>-
>- if (flags & L3FWD_EVENT_TX_DIRECT)
>- event_vector_txq_set(events[i].vec, 0);
>+ lpm_process_event_vector(events[i].vec, lconf,
>+ dst_port_list);
> }
>
> if (flags & L3FWD_EVENT_TX_ENQ) {
>@@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
>
> l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
> nb_deq, 1);
>+ rte_free(dst_port_list);
> }
>
> int __rte_noinline
>diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
>index ce515e0bc4..bf365341fb 100644
>--- a/examples/l3fwd/l3fwd_neon.h
>+++ b/examples/l3fwd/l3fwd_neon.h
>@@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct
>rte_mbuf **pkts_burst,
> }
> }
>
>+static __rte_always_inline uint16_t
>+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
>+{
>+ uint16_t i = 0;
>+
>+#if defined(RTE_ARCH_ARM64)
>+ uint16_t res;
>+
>+ while (nb_elem > 7) {
>+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
>+ uint16x8_t dp1;
>+
>+ dp1 = vld1q_u16(&dst_ports[i]);
>+ dp1 = vceqq_u16(dp1, dp);
>+ res = vminvq_u16(dp1);
>+ if (!res)
>+ return BAD_PORT;
>+
>+ nb_elem -= 8;
>+ i += 8;
>+ }
>+
>+ while (nb_elem > 3) {
>+ uint16x4_t dp = vdup_n_u16(dst_ports[0]);
>+ uint16x4_t dp1;
>+
>+ dp1 = vld1_u16(&dst_ports[i]);
>+ dp1 = vceq_u16(dp1, dp);
>+ res = vminv_u16(dp1);
>+ if (!res)
>+ return BAD_PORT;
>+
>+ nb_elem -= 4;
>+ i += 4;
>+ }
>+#endif
>+
>+ while (nb_elem) {
>+ if (dst_ports[i] != dst_ports[0])
>+ return BAD_PORT;
>+ nb_elem--;
>+ i++;
>+ }
>+
>+ return dst_ports[0];
>+}
>+
> #endif /* _L3FWD_NEON_H_ */
>diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
>index 0f0d0323a2..083729cdef 100644
>--- a/examples/l3fwd/l3fwd_sse.h
>+++ b/examples/l3fwd/l3fwd_sse.h
>@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct
>rte_mbuf **pkts_burst,
> }
> }
>
>+static __rte_always_inline uint16_t
>+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
>+{
>+ uint16_t i = 0, res;
>+
>+ while (nb_elem > 7) {
>+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
>+ __m128i dp1;
>+
>+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
>+ dp1 = _mm_cmpeq_epi16(dp1, dp);
>+ res = _mm_movemask_epi8(dp1);
>+ if (res != 0xFFFF)
>+ return BAD_PORT;
>+
>+ nb_elem -= 8;
>+ i += 8;
>+ }
>+
>+ while (nb_elem > 3) {
>+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
>+ __m128i dp1;
>+
>+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
>+ dp1 = _mm_cmpeq_epi16(dp1, dp);
>+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
>+ res = _mm_movemask_ps((__m128)dp1);
>+ if (res != 0xF)
>+ return BAD_PORT;
>+
>+ nb_elem -= 4;
>+ i += 4;
>+ }
>+
>+ while (nb_elem) {
>+ if (dst_ports[i] != dst_ports[0])
>+ return BAD_PORT;
>+ nb_elem--;
>+ i++;
>+ }
>+
>+ return dst_ports[0];
>+}
>+
> #endif /* _L3FWD_SSE_H_ */
>--
>2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib
2022-10-11 10:12 ` [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
@ 2022-10-17 12:06 ` Shijith Thotton
0 siblings, 0 replies; 41+ messages in thread
From: Shijith Thotton @ 2022-10-17 12:06 UTC (permalink / raw)
To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran
Cc: dev, Pavan Nikhilesh Bhagavatula
>
>Fix stack overflow when event vector size is greater than
>MAX_BURST_SIZE.
>Add missing mac swap and rfc1812 stage.
>
>Fixes: e8adca1951d4 ("examples/l3fwd: support event vector")
>
>Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
>---
> examples/l3fwd/l3fwd_fib.c | 130 ++++++++++++++++++++++++++-----------
> 1 file changed, 91 insertions(+), 39 deletions(-)
>
>diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c
>index b82e0c0354..edc0dd69b9 100644
>--- a/examples/l3fwd/l3fwd_fib.c
>+++ b/examples/l3fwd/l3fwd_fib.c
>@@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf,
> */
> #if !defined FIB_SEND_MULTI
> static inline void
>-fib_send_single(int nb_tx, struct lcore_conf *qconf,
>- struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
>+process_packet(struct rte_mbuf *pkt, uint16_t *hop)
> {
>- int32_t j;
> struct rte_ether_hdr *eth_hdr;
>
>- for (j = 0; j < nb_tx; j++) {
>- /* Run rfc1812 if packet is ipv4 and checks enabled. */
>+ /* Run rfc1812 if packet is ipv4 and checks enabled. */
> #if defined DO_RFC_1812_CHECKS
>- rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
>- pkts_burst[j], struct rte_ether_hdr *) + 1),
>- &hops[j], pkts_burst[j]->packet_type);
>+ rfc1812_process(
>+ (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
>+ pkt, struct rte_ether_hdr *) +
>+ 1),
>+ hop, pkt->packet_type);
> #endif
>
>- /* Set MAC addresses. */
>- eth_hdr = rte_pktmbuf_mtod(pkts_burst[j],
>- struct rte_ether_hdr *);
>- *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]];
>- rte_ether_addr_copy(&ports_eth_addr[hops[j]],
>- ð_hdr->src_addr);
>+ /* Set MAC addresses. */
>+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
>+ *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop];
>+ rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr);
>+}
>
>+static inline void
>+fib_send_single(int nb_tx, struct lcore_conf *qconf,
>+ struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
>+{
>+ int32_t j;
>+
>+ for (j = 0; j < nb_tx; j++) {
>+ process_packet(pkts_burst[j], &hops[j]);
>+ if (hops[j] == BAD_PORT) {
>+ rte_pktmbuf_free(pkts_burst[j]);
>+ continue;
>+ }
> /* Send single packet. */
> send_single_packet(qconf, pkts_burst[j], hops[j]);
> }
>@@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
> uint32_t ipv4_arr[MAX_PKT_BURST];
> uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
> uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
>- uint16_t nh;
>+ uint16_t nh, hops[MAX_PKT_BURST];
> uint8_t type_arr[MAX_PKT_BURST];
> uint32_t ipv4_cnt, ipv6_cnt;
> uint32_t ipv4_arr_assem, ipv6_arr_assem;
>@@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
> else
> nh = (uint16_t)hopsv6[ipv6_arr_assem++];
> if (nh != FIB_DEFAULT_HOP)
>- events[i].mbuf->port = nh;
>+ hops[i] = nh != FIB_DEFAULT_HOP ?
>+ nh :
>+ events[i].mbuf->port;
>+ process_packet(events[i].mbuf, &hops[i]);
>+ events[i].mbuf->port = hops[i] != BAD_PORT ?
>+ hops[i] :
>+ events[i].mbuf->port;
> }
>
> if (flags & L3FWD_EVENT_TX_ENQ) {
>@@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void
>*dummy)
> }
>
> static __rte_always_inline void
>-fib_process_event_vector(struct rte_event_vector *vec)
>+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr,
>+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6,
>+ uint32_t *ipv4_arr, uint16_t *hops)
> {
>- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
>- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
> uint32_t ipv4_arr_assem, ipv6_arr_assem;
> struct rte_mbuf **mbufs = vec->mbufs;
>- uint32_t ipv4_arr[MAX_PKT_BURST];
>- uint8_t type_arr[MAX_PKT_BURST];
> uint32_t ipv4_cnt, ipv6_cnt;
> struct lcore_conf *lconf;
> uint16_t nh;
>@@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector
>*vec)
>
> /* Lookup IPv6 hops if IPv6 packets are present. */
> if (ipv6_cnt > 0)
>- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr,
>- hopsv6, ipv6_cnt);
>-
>- if (vec->attr_valid) {
>- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0];
>- if (nh != FIB_DEFAULT_HOP)
>- vec->port = nh;
>- else
>- vec->attr_valid = 0;
>- }
>+ rte_fib6_lookup_bulk(
>+ lconf->ipv6_lookup_struct,
>+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6,
>+ ipv6_cnt);
>
> /* Assign ports looked up in fib depending on IPv4 or IPv6 */
> for (i = 0; i < vec->nb_elem; i++) {
>@@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec)
> else
> nh = (uint16_t)hopsv6[ipv6_arr_assem++];
> if (nh != FIB_DEFAULT_HOP)
>- mbufs[i]->port = nh;
>- event_vector_attr_validate(vec, mbufs[i]);
>+ hops[i] = nh;
>+ else
>+ hops[i] = vec->attr_valid ? vec->port :
>+ vec->mbufs[i]->port;
> }
>+
>+#if defined FIB_SEND_MULTI
>+ uint16_t k;
>+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
>+
>+ for (i = 0; i != k; i += FWDSTEP)
>+ processx4_step3(&vec->mbufs[i], &hops[i]);
>+ for (; i < vec->nb_elem; i++)
>+ process_packet(vec->mbufs[i], &hops[i]);
>+#else
>+ for (i = 0; i < vec->nb_elem; i++)
>+ process_packet(vec->mbufs[i], &hops[i]);
>+#endif
>+
>+ process_event_vector(vec, hops);
> }
>
> static __rte_always_inline void
>@@ -496,10 +521,37 @@ fib_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> const uint8_t event_d_id = evt_rsrc->event_d_id;
> const uint16_t deq_len = evt_rsrc->deq_depth;
> struct rte_event events[MAX_PKT_BURST];
>+ uint8_t *type_arr, **ipv6_arr, *ptr;
> int nb_enq = 0, nb_deq = 0, i;
>-
>- if (event_p_id < 0)
>+ uint64_t *hopsv4, *hopsv6;
>+ uint32_t *ipv4_arr;
>+ uint16_t *hops;
>+ uintptr_t mem;
>+
>+ mem = (uintptr_t)rte_zmalloc(
>+ "vector_fib",
>+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) +
>+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) +
>+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) *
>+ evt_rsrc->vector_size,
>+ RTE_CACHE_LINE_SIZE);
>+ if (mem == 0)
> return;
>+ ipv4_arr = (uint32_t *)mem;
>+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size];
>+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size];
>+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size];
>+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size];
>+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size];
>+
>+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size];
>+ for (i = 0; i < evt_rsrc->vector_size; i++)
>+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i];
>+
>+ if (event_p_id < 0) {
>+ rte_free((void *)mem);
>+ return;
>+ }
>
> RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__,
> rte_lcore_id());
>@@ -519,10 +571,9 @@ fib_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> events[i].op = RTE_EVENT_OP_FORWARD;
> }
>
>- fib_process_event_vector(events[i].vec);
>-
>- if (flags & L3FWD_EVENT_TX_DIRECT)
>- event_vector_txq_set(events[i].vec, 0);
>+ fib_process_event_vector(events[i].vec, type_arr,
>+ ipv6_arr, hopsv4, hopsv6,
>+ ipv4_arr, hops);
> }
>
> if (flags & L3FWD_EVENT_TX_ENQ) {
>@@ -546,6 +597,7 @@ fib_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
>
> l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
> nb_deq, 1);
>+ rte_free((void *)mem);
> }
>
> int __rte_noinline
>--
>2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation
2022-10-17 12:05 ` [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation Shijith Thotton
@ 2022-10-20 16:15 ` Pavan Nikhilesh Bhagavatula
0 siblings, 0 replies; 41+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2022-10-20 16:15 UTC (permalink / raw)
To: Shijith Thotton, Jerin Jacob Kollanukkaran, David Christensen; +Cc: dev, stable
> -----Original Message-----
> From: Shijith Thotton <sthotton@marvell.com>
> Sent: Monday, October 17, 2022 5:36 PM
> To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>; Jerin Jacob
> Kollanukkaran <jerinj@marvell.com>; David Christensen
> <drc@linux.vnet.ibm.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>; stable@dpdk.org
> Subject: RE: [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask
> generation
>
> >
> >Fix port group mask generation in altivec, vec_any_eq returns
> >0 or 1 while port_groupx4 expects comparison mask result.
> >
> >Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on
> powerpc")
> >Cc: stable@dpdk.org
> >
> >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Acked-by: Shijith Thotton <sthotton@marvell.com>
>
Thomas,
Will this series make it into 22.11 release?
> >---
> > v5 Changes:
> > - Fix compilation errors.
> >
> > v4 Changes:
> > - Fix missing `rte_free`.
> >
> > v3 Changes:
> > - PPC optimize port mask generation.
> > - Fix aarch32 compilation.
> >
> > v2 Changes:
> > - Fix PPC, RISC-V, aarch32 compilation.
> >
> > examples/common/altivec/port_group.h | 11 ++++++++---
> > 1 file changed, 8 insertions(+), 3 deletions(-)
> >
> >diff --git a/examples/common/altivec/port_group.h
> >b/examples/common/altivec/port_group.h
> >index 5e209b02fa..1c05bc025a 100644
> >--- a/examples/common/altivec/port_group.h
> >+++ b/examples/common/altivec/port_group.h
> >@@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t
> *lp,
> > uint16_t u16[FWDSTEP + 1];
> > uint64_t u64;
> > } *pnum = (void *)pn;
> >-
> >+ __vector unsigned long long result;
> >+ const __vector unsigned int perm_mask = {0x00204060, 0x80808080,
> >+ 0x80808080, 0x80808080};
> > int32_t v;
> >
> >- v = vec_any_eq(dp1, dp2);
> >-
> >+ dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2);
> >+ dp1 = vec_mergeh(dp1, dp1);
> >+ result = (__vector unsigned long long)vec_vbpermq(
> >+ (__vector unsigned char)dp1, (__vector unsigned
> >char)perm_mask);
> >
> >+ v = result[1];
> > /* update last port counter. */
> > lp[0] += gptbl[v].lpv;
> >
> >--
> >2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 1/5] examples/l3fwd: fix port group mask generation
2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
` (4 preceding siblings ...)
2022-10-17 12:05 ` [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation Shijith Thotton
@ 2022-10-25 16:05 ` pbhagavatula
2022-10-25 16:05 ` [PATCH v6 2/5] examples/l3fwd: split processing and send stages pbhagavatula
` (4 more replies)
5 siblings, 5 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw)
To: jerinj, thomas, David Christensen
Cc: dev, Pavan Nikhilesh, stable, Shijith Thotton
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix port group mask generation in altivec, vec_any_eq returns
0 or 1 while port_groupx4 expects comparison mask result.
Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc")
Cc: stable@dpdk.org
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
---
v6 Changes:
- Minor optimiazation to process_dst_port NEON.
v5 Changes:
- Fix compilation errors.
v4 Changes:
- Fix missing `rte_free`.
v3 Changes:
- PPC optimize port mask generation.
- Fix aarch32 compilation.
v2 Changes:
- Fix PPC, RISC-V, aarch32 compilation.
examples/common/altivec/port_group.h | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h
index 5e209b02fa..1c05bc025a 100644
--- a/examples/common/altivec/port_group.h
+++ b/examples/common/altivec/port_group.h
@@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
uint16_t u16[FWDSTEP + 1];
uint64_t u64;
} *pnum = (void *)pn;
-
+ __vector unsigned long long result;
+ const __vector unsigned int perm_mask = {0x00204060, 0x80808080,
+ 0x80808080, 0x80808080};
int32_t v;
- v = vec_any_eq(dp1, dp2);
-
+ dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2);
+ dp1 = vec_mergeh(dp1, dp1);
+ result = (__vector unsigned long long)vec_vbpermq(
+ (__vector unsigned char)dp1, (__vector unsigned char)perm_mask);
+ v = result[1];
/* update last port counter. */
lp[0] += gptbl[v].lpv;
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 2/5] examples/l3fwd: split processing and send stages
2022-10-25 16:05 ` [PATCH v6 " pbhagavatula
@ 2022-10-25 16:05 ` pbhagavatula
2022-10-25 16:05 ` [PATCH v6 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
` (3 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw)
To: jerinj, thomas, David Christensen, Ruifeng Wang,
Bruce Richardson, Konstantin Ananyev
Cc: dev, Pavan Nikhilesh, Shijith Thotton
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Split packet processing from packet send stage, as send stage
is not common for poll and event mode.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
---
examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++-----------
examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++---
examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++-------
examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++---
4 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index e76f2760b0..12b997e477 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
return portid;
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t *dst_port, uint16_t portid,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i, j, pos;
- uint16_t dst_port[MAX_PKT_BURST];
/*
* Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets
@@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
dst_port[j + i] = em_get_dst_port(qconf,
pkts_burst[j + i], portid);
}
+
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
}
- for (; j < nb_rx; j++)
+ for (; j < nb_rx; j++) {
dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &pkts_burst[j]->port);
+ }
+}
- send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0);
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
/*
@@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
*/
int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) {
+ for (j = 0; j < nb_rx; j++)
pkts_burst[j] = ev[j]->mbuf;
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *) + 1);
- }
for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
@@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
}
continue;
}
- processx4_step3(&pkts_burst[j], &dst_port[j]);
+ for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP)
+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]);
for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
pkts_burst[j + i]->port = dst_port[j + i];
diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h
index 0c6852a7bb..adb82f1478 100644
--- a/examples/l3fwd/l3fwd_lpm_altivec.h
+++ b/examples/l3fwd/l3fwd_lpm_altivec.h
@@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint8_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__vector unsigned int dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
ipv4_flag[j / FWDSTEP],
portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h
index 78ee83b76c..2a68c4c15e 100644
--- a/examples/l3fwd/l3fwd_lpm_neon.h
+++ b/examples/l3fwd/l3fwd_lpm_neon.h
@@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf,
}
}
-/*
- * Buffer optimized handling of packets, invoked
- * from main_loop.
- */
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t i = 0, j = 0;
- uint16_t dst_port[MAX_PKT_BURST];
int32x4_t dip;
uint32_t ipv4_flag;
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
void *));
}
-
for (j = 0; j != k - FWDSTEP; j += FWDSTEP) {
for (i = 0; i < FWDSTEP; i++) {
rte_prefetch0(rte_pktmbuf_mtod(
@@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid,
&pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
}
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j],
&dst_port[j]);
+ if (do_step3)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
j += FWDSTEP;
}
@@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
void *));
j++;
}
-
j -= m;
/* Classify last up to 3 packets one by one */
switch (m) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fallthrough */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],
portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
}
}
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
index 3f637a23d1..db15030320 100644
--- a/examples/l3fwd/l3fwd_lpm_sse.h
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf,
* from main_loop.
*/
static inline void
-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
- uint16_t portid, struct lcore_conf *qconf)
+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, uint16_t *dst_port,
+ struct lcore_conf *qconf, const uint8_t do_step3)
{
int32_t j;
- uint16_t dst_port[MAX_PKT_BURST];
__m128i dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
@@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
processx4_step2(qconf, dip[j / FWDSTEP],
ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]);
+ if (do_step3)
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
/* Classify last up to 3 packets one by one */
switch (nb_rx % FWDSTEP) {
case 3:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 2:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
/* fall-through */
case 1:
dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ if (do_step3)
+ process_packet(pkts_burst[j], &dst_port[j]);
j++;
}
+}
+
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid,
+ struct lcore_conf *qconf)
+{
+ uint16_t dst_port[MAX_PKT_BURST];
+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
+ 0);
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 3/5] examples/l3fwd: use lpm vector path for event vector
2022-10-25 16:05 ` [PATCH v6 " pbhagavatula
2022-10-25 16:05 ` [PATCH v6 2/5] examples/l3fwd: split processing and send stages pbhagavatula
@ 2022-10-25 16:05 ` pbhagavatula
2022-10-25 16:05 ` [PATCH v6 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
` (2 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw)
To: jerinj, thomas, David Christensen, Ruifeng Wang,
Bruce Richardson, Konstantin Ananyev
Cc: dev, Pavan Nikhilesh, Shijith Thotton
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use lpm vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
---
examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++
examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++
examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++--------
examples/l3fwd/l3fwd_neon.h | 48 +++++++++++++++++++++++
examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++
5 files changed, 215 insertions(+), 16 deletions(-)
diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h
index 87018f5dbe..e45e138e59 100644
--- a/examples/l3fwd/l3fwd_altivec.h
+++ b/examples/l3fwd/l3fwd_altivec.h
@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __vector unsigned short dp1;
+ __vector unsigned short dp;
+
+ dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
+ dp1 = *((__vector unsigned short *)&dst_ports[i]);
+ res = vec_all_eq(dp1, dp);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_ALTIVEC_H_ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index b93841a16f..3fe38aada0 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -82,6 +82,27 @@ struct l3fwd_event_resources {
uint64_t vector_tmo_ns;
};
+#if defined(RTE_ARCH_X86)
+#include "l3fwd_sse.h"
+#elif defined __ARM_NEON
+#include "l3fwd_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "l3fwd_altivec.h"
+#else
+static inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ int i;
+
+ for (i = 0; i < nb_elem; i++) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ }
+
+ return dst_ports[0];
+}
+#endif
+
static inline void
event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
{
@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
}
}
+static inline uint16_t
+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
+ uint16_t nb_pkts)
+{
+ uint16_t *des_pos, free = 0;
+ struct rte_mbuf **pos;
+ int i;
+
+ /* Filter out and free bad packets */
+ for (i = 0; i < nb_pkts; i++) {
+ if (dst_port[i] == BAD_PORT) {
+ rte_pktmbuf_free(mbufs[i]);
+ if (!free) {
+ pos = &mbufs[i];
+ des_pos = &dst_port[i];
+ }
+ free++;
+ continue;
+ }
+
+ if (free) {
+ *pos = mbufs[i];
+ pos++;
+ *des_pos = dst_port[i];
+ des_pos++;
+ }
+ }
+ return nb_pkts - free;
+}
+
+static inline void
+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
+{
+ uint16_t port, i;
+
+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem);
+ /* Verify destination array */
+ port = process_dst_port(dst_port, vec->nb_elem);
+ if (port == BAD_PORT) {
+ vec->attr_valid = 0;
+ for (i = 0; i < vec->nb_elem; i++) {
+ vec->mbufs[i]->port = dst_port[i];
+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
+ }
+ } else {
+ vec->attr_valid = 1;
+ vec->port = port;
+ vec->queue = 0;
+ }
+}
struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index 22d7f61a42..5172979c72 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf)
+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf,
+ uint16_t *dst_port)
{
struct rte_mbuf **mbufs = vec->mbufs;
int i;
- /* Process first packet to init vector attributes */
- lpm_process_event_pkt(lconf, mbufs[0]);
+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64
if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
+ dst_port, lconf, 1);
+ } else {
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
+ &dst_port[i], lconf, 1);
}
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]);
+#endif
- for (i = 1; i < vec->nb_elem; i++) {
- lpm_process_event_pkt(lconf, mbufs[i]);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ process_event_vector(vec, dst_port);
}
/* Same eventdev loop for single and burst of vector */
@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
struct rte_event events[MAX_PKT_BURST];
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
+ uint16_t *dst_port_list;
unsigned int lcore_id;
if (event_p_id < 0)
@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
-
+ dst_port_list =
+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_port_list == NULL)
+ return;
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
while (!force_quit) {
@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- lpm_process_event_vector(events[i].vec, lconf);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ lpm_process_event_vector(events[i].vec, lconf,
+ dst_port_list);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free(dst_port_list);
}
int __rte_noinline
diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
index ce515e0bc4..40807d5965 100644
--- a/examples/l3fwd/l3fwd_neon.h
+++ b/examples/l3fwd/l3fwd_neon.h
@@ -194,4 +194,52 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0;
+
+#if defined(RTE_ARCH_ARM64)
+ uint64_t res;
+
+ while (nb_elem > 7) {
+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
+ uint16x8_t dp1;
+
+ dp1 = vld1q_u16(&dst_ports[i]);
+ dp1 = vceqq_u16(dp1, dp);
+ res = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(dp1, 4)),
+ 0);
+ if (res != ~0ULL)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ uint16x4_t dp = vdup_n_u16(dst_ports[0]);
+ uint16x4_t dp1;
+
+ dp1 = vld1_u16(&dst_ports[i]);
+ dp1 = vceq_u16(dp1, dp);
+ res = vget_lane_u64(vreinterpret_u64_u16(dp1), 0);
+ if (res != ~0ULL)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+#endif
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_NEON_H_ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index 0f0d0323a2..083729cdef 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ res = _mm_movemask_epi8(dp1);
+ if (res != 0xFFFF)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
+ res = _mm_movemask_ps((__m128)dp1);
+ if (res != 0xF)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_SSE_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 4/5] examples/l3fwd: fix event vector processing in fib
2022-10-25 16:05 ` [PATCH v6 " pbhagavatula
2022-10-25 16:05 ` [PATCH v6 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-10-25 16:05 ` [PATCH v6 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
@ 2022-10-25 16:05 ` pbhagavatula
2022-10-25 16:05 ` [PATCH v6 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
2022-10-31 14:52 ` [PATCH v6 1/5] examples/l3fwd: fix port group mask generation Thomas Monjalon
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw)
To: jerinj, thomas; +Cc: dev, Pavan Nikhilesh, Shijith Thotton
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Fix stack overflow when event vector size is greater than
MAX_BURST_SIZE.
Add missing mac swap and rfc1812 stage.
Fixes: e8adca1951d4 ("examples/l3fwd: support event vector")
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
---
examples/l3fwd/l3fwd_fib.c | 130 ++++++++++++++++++++++++++-----------
1 file changed, 91 insertions(+), 39 deletions(-)
diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c
index b82e0c0354..edc0dd69b9 100644
--- a/examples/l3fwd/l3fwd_fib.c
+++ b/examples/l3fwd/l3fwd_fib.c
@@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf,
*/
#if !defined FIB_SEND_MULTI
static inline void
-fib_send_single(int nb_tx, struct lcore_conf *qconf,
- struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+process_packet(struct rte_mbuf *pkt, uint16_t *hop)
{
- int32_t j;
struct rte_ether_hdr *eth_hdr;
- for (j = 0; j < nb_tx; j++) {
- /* Run rfc1812 if packet is ipv4 and checks enabled. */
+ /* Run rfc1812 if packet is ipv4 and checks enabled. */
#if defined DO_RFC_1812_CHECKS
- rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
- pkts_burst[j], struct rte_ether_hdr *) + 1),
- &hops[j], pkts_burst[j]->packet_type);
+ rfc1812_process(
+ (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
+ pkt, struct rte_ether_hdr *) +
+ 1),
+ hop, pkt->packet_type);
#endif
- /* Set MAC addresses. */
- eth_hdr = rte_pktmbuf_mtod(pkts_burst[j],
- struct rte_ether_hdr *);
- *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]];
- rte_ether_addr_copy(&ports_eth_addr[hops[j]],
- ð_hdr->src_addr);
+ /* Set MAC addresses. */
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+ *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop];
+ rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr);
+}
+static inline void
+fib_send_single(int nb_tx, struct lcore_conf *qconf,
+ struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
+{
+ int32_t j;
+
+ for (j = 0; j < nb_tx; j++) {
+ process_packet(pkts_burst[j], &hops[j]);
+ if (hops[j] == BAD_PORT) {
+ rte_pktmbuf_free(pkts_burst[j]);
+ continue;
+ }
/* Send single packet. */
send_single_packet(qconf, pkts_burst[j], hops[j]);
}
@@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
uint32_t ipv4_arr[MAX_PKT_BURST];
uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
- uint16_t nh;
+ uint16_t nh, hops[MAX_PKT_BURST];
uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
uint32_t ipv4_arr_assem, ipv6_arr_assem;
@@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- events[i].mbuf->port = nh;
+ hops[i] = nh != FIB_DEFAULT_HOP ?
+ nh :
+ events[i].mbuf->port;
+ process_packet(events[i].mbuf, &hops[i]);
+ events[i].mbuf->port = hops[i] != BAD_PORT ?
+ hops[i] :
+ events[i].mbuf->port;
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-fib_process_event_vector(struct rte_event_vector *vec)
+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr,
+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6,
+ uint32_t *ipv4_arr, uint16_t *hops)
{
- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
uint32_t ipv4_arr_assem, ipv6_arr_assem;
struct rte_mbuf **mbufs = vec->mbufs;
- uint32_t ipv4_arr[MAX_PKT_BURST];
- uint8_t type_arr[MAX_PKT_BURST];
uint32_t ipv4_cnt, ipv6_cnt;
struct lcore_conf *lconf;
uint16_t nh;
@@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector *vec)
/* Lookup IPv6 hops if IPv6 packets are present. */
if (ipv6_cnt > 0)
- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr,
- hopsv6, ipv6_cnt);
-
- if (vec->attr_valid) {
- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0];
- if (nh != FIB_DEFAULT_HOP)
- vec->port = nh;
- else
- vec->attr_valid = 0;
- }
+ rte_fib6_lookup_bulk(
+ lconf->ipv6_lookup_struct,
+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6,
+ ipv6_cnt);
/* Assign ports looked up in fib depending on IPv4 or IPv6 */
for (i = 0; i < vec->nb_elem; i++) {
@@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec)
else
nh = (uint16_t)hopsv6[ipv6_arr_assem++];
if (nh != FIB_DEFAULT_HOP)
- mbufs[i]->port = nh;
- event_vector_attr_validate(vec, mbufs[i]);
+ hops[i] = nh;
+ else
+ hops[i] = vec->attr_valid ? vec->port :
+ vec->mbufs[i]->port;
}
+
+#if defined FIB_SEND_MULTI
+ uint16_t k;
+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != k; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &hops[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#endif
+
+ process_event_vector(vec, hops);
}
static __rte_always_inline void
@@ -496,10 +521,37 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
const uint8_t event_d_id = evt_rsrc->event_d_id;
const uint16_t deq_len = evt_rsrc->deq_depth;
struct rte_event events[MAX_PKT_BURST];
+ uint8_t *type_arr, **ipv6_arr, *ptr;
int nb_enq = 0, nb_deq = 0, i;
-
- if (event_p_id < 0)
+ uint64_t *hopsv4, *hopsv6;
+ uint32_t *ipv4_arr;
+ uint16_t *hops;
+ uintptr_t mem;
+
+ mem = (uintptr_t)rte_zmalloc(
+ "vector_fib",
+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) +
+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) +
+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) *
+ evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (mem == 0)
return;
+ ipv4_arr = (uint32_t *)mem;
+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size];
+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size];
+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size];
+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size];
+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size];
+
+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size];
+ for (i = 0; i < evt_rsrc->vector_size; i++)
+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i];
+
+ if (event_p_id < 0) {
+ rte_free((void *)mem);
+ return;
+ }
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__,
rte_lcore_id());
@@ -519,10 +571,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- fib_process_event_vector(events[i].vec);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ fib_process_event_vector(events[i].vec, type_arr,
+ ipv6_arr, hopsv4, hopsv6,
+ ipv4_arr, hops);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -546,6 +597,7 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free((void *)mem);
}
int __rte_noinline
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 5/5] examples/l3fwd: use em vector path for event vector
2022-10-25 16:05 ` [PATCH v6 " pbhagavatula
` (2 preceding siblings ...)
2022-10-25 16:05 ` [PATCH v6 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
@ 2022-10-25 16:05 ` pbhagavatula
2022-10-31 14:52 ` [PATCH v6 1/5] examples/l3fwd: fix port group mask generation Thomas Monjalon
4 siblings, 0 replies; 41+ messages in thread
From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw)
To: jerinj, thomas; +Cc: dev, Pavan Nikhilesh, Shijith Thotton
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use em vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
---
examples/l3fwd/l3fwd_em.c | 13 +++--
examples/l3fwd/l3fwd_em.h | 29 +++++------
examples/l3fwd/l3fwd_em_hlm.h | 72 +++++-----------------------
examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++----
examples/l3fwd/l3fwd_event.h | 21 --------
5 files changed, 48 insertions(+), 112 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index a203dc9e46..35de31157e 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -860,10 +860,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
unsigned int lcore_id;
+ uint16_t *dst_ports;
if (event_p_id < 0)
return;
+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_ports == NULL)
+ return;
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
@@ -885,13 +890,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
}
#if defined RTE_ARCH_X86 || defined __ARM_NEON
- l3fwd_em_process_event_vector(events[i].vec, lconf);
+ l3fwd_em_process_event_vector(events[i].vec, lconf,
+ dst_ports);
#else
l3fwd_em_no_opt_process_event_vector(events[i].vec,
- lconf);
+ lconf, dst_ports);
#endif
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -915,6 +919,7 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free(dst_ports);
}
int __rte_noinline
diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h
index fe2ee59f6a..7d051fc076 100644
--- a/examples/l3fwd/l3fwd_em.h
+++ b/examples/l3fwd/l3fwd_em.h
@@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid,
}
}
-static __rte_always_inline void
+static __rte_always_inline uint16_t
l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
{
struct rte_ether_hdr *eth_hdr;
@@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf);
else
m->port = BAD_PORT;
+
+ return m->port;
}
/*
@@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events,
static inline void
l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf,
+ uint16_t *dst_ports)
{
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i;
@@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *));
- /* Process first packet to init vector attributes */
- l3fwd_em_simple_process(mbufs[0], qconf);
- if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
- }
-
/*
* Prefetch and forward already prefetched packets.
*/
- for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
+ for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
rte_prefetch0(
rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *));
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
}
/* Forward remaining prefetched packets */
- for (; i < vec->nb_elem; i++) {
- l3fwd_em_simple_process(mbufs[i], qconf);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ for (; i < vec->nb_elem; i++)
+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 12b997e477..2e11eefad7 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_port)
{
- struct rte_mbuf **mbufs = vec->mbufs;
- uint16_t dst_port[MAX_PKT_BURST];
- int32_t i, j, n, pos;
-
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++)
- rte_prefetch0(
- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1);
+ uint16_t i;
if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
- uint32_t pkt_type =
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP;
- uint32_t l3_type, tcp_or_udp;
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
- pkt_type &= mbufs[j + i]->packet_type;
-
- l3_type = pkt_type & RTE_PTYPE_L3_MASK;
- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
-
- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem;
- i++, pos++) {
- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos],
- struct rte_ether_hdr *) +
- 1);
- }
-
- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j],
- &dst_port[j]);
- } else {
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port =
- em_get_dst_port(qconf, mbufs[j + i],
- mbufs[j + i]->port);
- process_packet(mbufs[j + i],
- &mbufs[j + i]->port);
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- continue;
- }
- processx4_step3(&mbufs[j], &dst_port[j]);
-
- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
- mbufs[j + i]->port = dst_port[j + i];
- event_vector_attr_validate(vec, mbufs[j + i]);
- }
- }
-
- for (; j < vec->nb_elem; j++) {
- mbufs[j]->port =
- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port);
- process_packet(mbufs[j], &mbufs[j]->port);
- event_vector_attr_validate(vec, mbufs[j]);
- }
+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port,
+ vec->port, qconf, 1);
+ else
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_em_process_packets(1, &vec->mbufs[i],
+ &dst_port[i],
+ vec->mbufs[i]->port, qconf, 1);
+
+ process_event_vector(vec, dst_port);
}
#endif /* __L3FWD_EM_HLM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index d2f75edb8a..067f23889a 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events,
for (i = 1, j = 0; j < nb_rx; i++, j++) {
struct rte_mbuf *mbuf = events[j]->mbuf;
+ uint16_t port;
if (i < nb_rx) {
rte_prefetch0(rte_pktmbuf_mtod(
events[i]->mbuf,
struct rte_ether_hdr *) + 1);
}
+ port = mbuf->port;
mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
process_packet(mbuf, &mbuf->port);
+ if (mbuf->port == BAD_PORT)
+ mbuf->port = port;
}
}
static inline void
l3fwd_em_process_event_vector(struct rte_event_vector *vec,
- struct lcore_conf *qconf)
+ struct lcore_conf *qconf, uint16_t *dst_ports)
{
+ const uint8_t attr_valid = vec->attr_valid;
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i, j;
rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1);
- if (vec->attr_valid)
- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port);
-
for (i = 0, j = 1; i < vec->nb_elem; i++, j++) {
if (j < vec->nb_elem)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[j],
struct rte_ether_hdr *) +
1);
- mbufs[i]->port =
- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port);
- process_packet(mbufs[i], &mbufs[i]->port);
- event_vector_attr_validate(vec, mbufs[i]);
+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
+ attr_valid ? vec->port :
+ mbufs[i]->port);
}
+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != j; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &dst_ports[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &dst_ports[i]);
+
+ process_event_vector(vec, dst_ports);
}
#endif /* __L3FWD_EM_SEQUENTIAL_H__ */
diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
index 3fe38aada0..e21817c36b 100644
--- a/examples/l3fwd/l3fwd_event.h
+++ b/examples/l3fwd/l3fwd_event.h
@@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
}
#endif
-static inline void
-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
-{
- /* l3fwd application only changes mbuf port while processing */
- if (vec->attr_valid && (vec->port != mbuf->port))
- vec->attr_valid = 0;
-}
-
-static inline void
-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
-{
- if (vec->attr_valid) {
- vec->queue = txq;
- } else {
- int i;
-
- for (i = 0; i < vec->nb_elem; i++)
- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq);
- }
-}
-
static inline uint16_t
filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
uint16_t nb_pkts)
--
2.25.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH v6 1/5] examples/l3fwd: fix port group mask generation
2022-10-25 16:05 ` [PATCH v6 " pbhagavatula
` (3 preceding siblings ...)
2022-10-25 16:05 ` [PATCH v6 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
@ 2022-10-31 14:52 ` Thomas Monjalon
4 siblings, 0 replies; 41+ messages in thread
From: Thomas Monjalon @ 2022-10-31 14:52 UTC (permalink / raw)
To: Pavan Nikhilesh
Cc: jerinj, David Christensen, stable, dev, stable, Shijith Thotton
25/10/2022 18:05, pbhagavatula@marvell.com:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Fix port group mask generation in altivec, vec_any_eq returns
> 0 or 1 while port_groupx4 expects comparison mask result.
>
> Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc")
> Cc: stable@dpdk.org
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Acked-by: Shijith Thotton <sthotton@marvell.com>
Series applied, thanks.
^ permalink raw reply [flat|nested] 41+ messages in thread
end of thread, other threads:[~2022-10-31 14:53 UTC | newest]
Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-08-29 9:44 ` [PATCH 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-08-29 9:44 ` [PATCH 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
2022-08-29 9:44 ` [PATCH 4/5] examples/l3fwd: use em " pbhagavatula
2022-08-29 9:44 ` [PATCH 5/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-09-02 9:18 ` [PATCH v2 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-09-02 9:18 ` [PATCH v2 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
2022-09-02 9:18 ` [PATCH v2 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
2022-09-02 9:18 ` [PATCH v2 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
2022-09-08 18:33 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation David Christensen
2022-09-09 5:56 ` [EXT] " Pavan Nikhilesh Bhagavatula
2022-09-11 18:12 ` [PATCH v3 " pbhagavatula
2022-09-11 18:12 ` [PATCH v3 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-09-11 18:12 ` [PATCH v3 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
2022-09-11 18:12 ` [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
2022-10-07 20:03 ` [EXT] " Shijith Thotton
2022-09-11 18:12 ` [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
2022-10-07 20:01 ` [EXT] " Shijith Thotton
2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-10-11 9:08 ` [PATCH v4 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-10-11 9:08 ` [PATCH v4 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
2022-10-11 9:08 ` [PATCH v4 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
2022-10-11 9:08 ` [PATCH v4 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula
2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-10-17 12:06 ` [EXT] " Shijith Thotton
2022-10-11 10:12 ` [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
2022-10-17 12:06 ` [EXT] " Shijith Thotton
2022-10-11 10:12 ` [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
2022-10-17 12:06 ` [EXT] " Shijith Thotton
2022-10-11 10:12 ` [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
2022-10-12 8:57 ` [EXT] " Shijith Thotton
2022-10-17 12:05 ` [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation Shijith Thotton
2022-10-20 16:15 ` Pavan Nikhilesh Bhagavatula
2022-10-25 16:05 ` [PATCH v6 " pbhagavatula
2022-10-25 16:05 ` [PATCH v6 2/5] examples/l3fwd: split processing and send stages pbhagavatula
2022-10-25 16:05 ` [PATCH v6 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula
2022-10-25 16:05 ` [PATCH v6 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula
2022-10-25 16:05 ` [PATCH v6 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula
2022-10-31 14:52 ` [PATCH v6 1/5] examples/l3fwd: fix port group mask generation Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).