* [PATCH 1/5] examples/l3fwd: fix port group mask generation @ 2022-08-29 9:44 pbhagavatula 2022-08-29 9:44 ` [PATCH 2/5] examples/l3fwd: split processing and send stages pbhagavatula ` (4 more replies) 0 siblings, 5 replies; 41+ messages in thread From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw) To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix port group mask generation in altivec, vec_any_eq returns 0 or 1 while port_groupx4 expects comparison mask result. Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/common/altivec/port_group.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h index 5e209b02fa..7a6ef390ff 100644 --- a/examples/common/altivec/port_group.h +++ b/examples/common/altivec/port_group.h @@ -26,12 +26,19 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16_t u16[FWDSTEP + 1]; uint64_t u64; } *pnum = (void *)pn; + union u_vec { + __vector unsigned short v_us; + unsigned short s[8]; + }; + union u_vec res; int32_t v; - v = vec_any_eq(dp1, dp2); - + dp1 = vec_cmpeq(dp1, dp2); + res.v_us = dp1; + v = (res.s[0] & 0x1) | (res.s[1] & 0x2) | (res.s[2] & 0x4) | + (res.s[3] & 0x8); /* update last port counter. */ lp[0] += gptbl[v].lpv; -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH 2/5] examples/l3fwd: split processing and send stages 2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula @ 2022-08-29 9:44 ` pbhagavatula 2022-08-29 9:44 ` [PATCH 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula ` (3 subsequent siblings) 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Split packet processing from packet send stage, as send stage is not common for poll and event mode. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++----------- examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++--- examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++------- examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++--- 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index e76f2760b0..12b997e477 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, return portid; } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t *dst_port, uint16_t portid, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i, j, pos; - uint16_t dst_port[MAX_PKT_BURST]; /* * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets @@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, dst_port[j + i] = em_get_dst_port(qconf, pkts_burst[j + i], portid); } + + for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); } - for (; j < nb_rx; j++) + for (; j < nb_rx; j++) { dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &pkts_burst[j]->port); + } +} - send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0); + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } /* @@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, */ int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) { + for (j = 0; j < nb_rx; j++) pkts_burst[j] = ev[j]->mbuf; - rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *) + 1); - } for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { @@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, } continue; } - processx4_step3(&pkts_burst[j], &dst_port[j]); + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) pkts_burst[j + i]->port = dst_port[j + i]; diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h index 0c6852a7bb..adb82f1478 100644 --- a/examples/l3fwd/l3fwd_lpm_altivec.h +++ b/examples/l3fwd/l3fwd_lpm_altivec.h @@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint8_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __vector unsigned int dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h index 78ee83b76c..2a68c4c15e 100644 --- a/examples/l3fwd/l3fwd_lpm_neon.h +++ b/examples/l3fwd/l3fwd_lpm_neon.h @@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf, } } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i = 0, j = 0; - uint16_t dst_port[MAX_PKT_BURST]; int32x4_t dip; uint32_t ipv4_flag; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], void *)); } - for (j = 0; j != k - FWDSTEP; j += FWDSTEP) { for (i = 0; i < FWDSTEP; i++) { rte_prefetch0(rte_pktmbuf_mtod( @@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); } processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); j += FWDSTEP; } @@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, void *)); j++; } - j -= m; /* Classify last up to 3 packets one by one */ switch (m) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); } } +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h index 3f637a23d1..db15030320 100644 --- a/examples/l3fwd/l3fwd_lpm_sse.h +++ b/examples/l3fwd/l3fwd_lpm_sse.h @@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __m128i dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step2(qconf, dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH 3/5] examples/l3fwd: use lpm vector path for event vector 2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-08-29 9:44 ` [PATCH 2/5] examples/l3fwd: split processing and send stages pbhagavatula @ 2022-08-29 9:44 ` pbhagavatula 2022-08-29 9:44 ` [PATCH 4/5] examples/l3fwd: use em " pbhagavatula ` (2 subsequent siblings) 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use lpm vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_altivec.h | 28 ++++++++++++++++ examples/l3fwd/l3fwd_event.h | 58 ++++++++++++++++++++++++++++++++++ examples/l3fwd/l3fwd_lpm.c | 33 +++++++++---------- examples/l3fwd/l3fwd_neon.h | 43 +++++++++++++++++++++++++ examples/l3fwd/l3fwd_sse.h | 44 ++++++++++++++++++++++++++ 5 files changed, 190 insertions(+), 16 deletions(-) diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h index 87018f5dbe..00a80225cd 100644 --- a/examples/l3fwd/l3fwd_altivec.h +++ b/examples/l3fwd/l3fwd_altivec.h @@ -222,4 +222,32 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __vector unsigned short dp = vec_splats((short)dst_ports[0]); + __vector unsigned short dp1; + + dp1 = *((__vector unsigned short *)&dst_ports[i]); + res = vec_all_eq(dp1, dp); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_ALTIVEC_H_ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index b93841a16f..26c3254004 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -14,6 +14,14 @@ #include "l3fwd.h" +#if defined(RTE_ARCH_X86) +#include "l3fwd_sse.h" +#elif defined __ARM_NEON +#include "l3fwd_neon.h" +#elif defined(RTE_ARCH_PPC_64) +#include "l3fwd_altivec.h" +#endif + #define L3FWD_EVENT_SINGLE 0x1 #define L3FWD_EVENT_BURST 0x2 #define L3FWD_EVENT_TX_DIRECT 0x4 @@ -103,7 +111,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) } } +static inline uint16_t +filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, + uint16_t nb_pkts) +{ + uint16_t *des_pos, free = 0; + struct rte_mbuf **pos; + int i; + + /* Filter out and free bad packets */ + for (i = 0; i < nb_pkts; i++) { + if (dst_port[i] == BAD_PORT) { + rte_pktmbuf_free(mbufs[i]); + if (!free) { + pos = &mbufs[i]; + des_pos = &dst_port[i]; + } + free++; + continue; + } + + if (free) { + *pos = mbufs[i]; + pos++; + *des_pos = dst_port[i]; + des_pos++; + } + } + + return nb_pkts - free; +} + +static inline void +process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port) +{ + uint16_t port, i; + vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem); + /* Verify destination array */ + port = process_dst_port(dst_port, vec->nb_elem); + if (port == BAD_PORT) { + vec->attr_valid = 0; + for (i = 0; i < vec->nb_elem; i++) { + vec->mbufs[i]->port = dst_port[i]; + rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0); + } + } else { + vec->attr_valid = 1; + vec->port = port; + vec->queue = 0; + } +} struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void); void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf); diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index d1b850dd5b..3f67ab01d4 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -425,24 +425,22 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf) +lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf, + uint16_t *dst_port) { struct rte_mbuf **mbufs = vec->mbufs; int i; - /* Process first packet to init vector attributes */ - lpm_process_event_pkt(lconf, mbufs[0]); if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; + l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port, + dst_port, lconf, 1); + } else { + for (i = 0; i < vec->nb_elem; i++) + l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port, + &dst_port[i], lconf, 1); } - for (i = 1; i < vec->nb_elem; i++) { - lpm_process_event_pkt(lconf, mbufs[i]); - event_vector_attr_validate(vec, mbufs[i]); - } + process_event_vector(vec, dst_port); } /* Same eventdev loop for single and burst of vector */ @@ -458,6 +456,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, struct rte_event events[MAX_PKT_BURST]; int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; + uint16_t *dst_port_list; unsigned int lcore_id; if (event_p_id < 0) @@ -465,7 +464,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; - + dst_port_list = + rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_port_list == NULL) + return; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); while (!force_quit) { @@ -483,10 +486,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - lpm_process_event_vector(events[i].vec, lconf); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + lpm_process_event_vector(events[i].vec, lconf, + dst_port_list); } if (flags & L3FWD_EVENT_TX_ENQ) { diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h index ce515e0bc4..60e6a310e0 100644 --- a/examples/l3fwd/l3fwd_neon.h +++ b/examples/l3fwd/l3fwd_neon.h @@ -194,4 +194,47 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + uint16x8_t dp = vdupq_n_u16(dst_ports[0]); + uint16x8_t dp1; + + dp1 = vld1q_u16(&dst_ports[i]); + dp1 = vceqq_u16(dp1, dp); + res = vminvq_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + uint16x4_t dp = vdup_n_u16(dst_ports[0]); + uint16x4_t dp1; + + dp1 = vld1_u16(&dst_ports[i]); + dp1 = vceq_u16(dp1, dp); + res = vminv_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_NEON_H_ */ diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h index 0f0d0323a2..083729cdef 100644 --- a/examples/l3fwd/l3fwd_sse.h +++ b/examples/l3fwd/l3fwd_sse.h @@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + res = _mm_movemask_epi8(dp1); + if (res != 0xFFFF) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + res = _mm_movemask_ps((__m128)dp1); + if (res != 0xF) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_SSE_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH 4/5] examples/l3fwd: use em vector path for event vector 2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-08-29 9:44 ` [PATCH 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-08-29 9:44 ` [PATCH 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula @ 2022-08-29 9:44 ` pbhagavatula 2022-08-29 9:44 ` [PATCH 5/5] examples/l3fwd: fix event vector processing in fib pbhagavatula 2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use em vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em.c | 10 ++-- examples/l3fwd/l3fwd_em_hlm.h | 72 +++++----------------------- examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++---- examples/l3fwd/l3fwd_event.h | 21 -------- 4 files changed, 35 insertions(+), 93 deletions(-) diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index 10be24c61d..ac475073d7 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -852,10 +852,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; unsigned int lcore_id; + uint16_t *dst_ports; if (event_p_id < 0) return; + dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_ports == NULL) + return; lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; @@ -877,13 +882,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, } #if defined RTE_ARCH_X86 || defined __ARM_NEON - l3fwd_em_process_event_vector(events[i].vec, lconf); + l3fwd_em_process_event_vector(events[i].vec, lconf, + dst_ports); #else l3fwd_em_no_opt_process_event_vector(events[i].vec, lconf); #endif - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); } if (flags & L3FWD_EVENT_TX_ENQ) { diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 12b997e477..2e11eefad7 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_port) { - struct rte_mbuf **mbufs = vec->mbufs; - uint16_t dst_port[MAX_PKT_BURST]; - int32_t i, j, n, pos; - - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++) - rte_prefetch0( - rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); + uint16_t i; if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - - n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { - uint32_t pkt_type = - RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP; - uint32_t l3_type, tcp_or_udp; - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) - pkt_type &= mbufs[j + i]->packet_type; - - l3_type = pkt_type & RTE_PTYPE_L3_MASK; - tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); - - for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; - i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem; - i++, pos++) { - rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos], - struct rte_ether_hdr *) + - 1); - } - - if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { - em_get_dst_port_ipv4xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { - em_get_dst_port_ipv6xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else { - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = - em_get_dst_port(qconf, mbufs[j + i], - mbufs[j + i]->port); - process_packet(mbufs[j + i], - &mbufs[j + i]->port); - event_vector_attr_validate(vec, mbufs[j + i]); - } - continue; - } - processx4_step3(&mbufs[j], &dst_port[j]); - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = dst_port[j + i]; - event_vector_attr_validate(vec, mbufs[j + i]); - } - } - - for (; j < vec->nb_elem; j++) { - mbufs[j]->port = - em_get_dst_port(qconf, mbufs[j], mbufs[j]->port); - process_packet(mbufs[j], &mbufs[j]->port); - event_vector_attr_validate(vec, mbufs[j]); - } + l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port, + vec->port, qconf, 1); + else + for (i = 0; i < vec->nb_elem; i++) + l3fwd_em_process_packets(1, &vec->mbufs[i], + &dst_port[i], + vec->mbufs[i]->port, qconf, 1); + + process_event_vector(vec, dst_port); } #endif /* __L3FWD_EM_HLM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index d2f75edb8a..067f23889a 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events, for (i = 1, j = 0; j < nb_rx; i++, j++) { struct rte_mbuf *mbuf = events[j]->mbuf; + uint16_t port; if (i < nb_rx) { rte_prefetch0(rte_pktmbuf_mtod( events[i]->mbuf, struct rte_ether_hdr *) + 1); } + port = mbuf->port; mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); + if (mbuf->port == BAD_PORT) + mbuf->port = port; } } static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_ports) { + const uint8_t attr_valid = vec->attr_valid; struct rte_mbuf **mbufs = vec->mbufs; int32_t i, j; rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1); - if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - for (i = 0, j = 1; i < vec->nb_elem; i++, j++) { if (j < vec->nb_elem) rte_prefetch0(rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); - mbufs[i]->port = - em_get_dst_port(qconf, mbufs[i], mbufs[i]->port); - process_packet(mbufs[i], &mbufs[i]->port); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = em_get_dst_port(qconf, mbufs[i], + attr_valid ? vec->port : + mbufs[i]->port); } + j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != j; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &dst_ports[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &dst_ports[i]); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_SEQUENTIAL_H__ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index 26c3254004..2844cc4dd6 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -90,27 +90,6 @@ struct l3fwd_event_resources { uint64_t vector_tmo_ns; }; -static inline void -event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) -{ - /* l3fwd application only changes mbuf port while processing */ - if (vec->attr_valid && (vec->port != mbuf->port)) - vec->attr_valid = 0; -} - -static inline void -event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) -{ - if (vec->attr_valid) { - vec->queue = txq; - } else { - int i; - - for (i = 0; i < vec->nb_elem; i++) - rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq); - } -} - static inline uint16_t filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, uint16_t nb_pkts) -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH 5/5] examples/l3fwd: fix event vector processing in fib 2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (2 preceding siblings ...) 2022-08-29 9:44 ` [PATCH 4/5] examples/l3fwd: use em " pbhagavatula @ 2022-08-29 9:44 ` pbhagavatula 2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-08-29 9:44 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix stack overflow when event vector size is greater then MAX_BURST_SIZE. Add missing mac swap and rfc1812 stage. Fixes: e8adca1951d4 ("examples/l3fwd: support event vector") Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_fib.c | 85 +++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 23 deletions(-) diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c index e02e4b3f5a..80f0330c69 100644 --- a/examples/l3fwd/l3fwd_fib.c +++ b/examples/l3fwd/l3fwd_fib.c @@ -261,7 +261,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, uint32_t ipv4_arr[MAX_PKT_BURST]; uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; - uint16_t nh; + uint16_t nh, hops[MAX_PKT_BURST]; uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; uint32_t ipv4_arr_assem, ipv6_arr_assem; @@ -350,7 +350,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - events[i].mbuf->port = nh; + hops[i] = nh != FIB_DEFAULT_HOP ? + nh : + events[i].mbuf->port; + process_packet(events[i].mbuf, &hops[i]); + events[i].mbuf->port = hops[i] != BAD_PORT ? + hops[i] : + events[i].mbuf->port; } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -418,14 +424,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -fib_process_event_vector(struct rte_event_vector *vec) +fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr, + uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6, + uint32_t *ipv4_arr, uint16_t *hops) { - uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; - uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; uint32_t ipv4_arr_assem, ipv6_arr_assem; struct rte_mbuf **mbufs = vec->mbufs; - uint32_t ipv4_arr[MAX_PKT_BURST]; - uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; struct lcore_conf *lconf; uint16_t nh; @@ -463,16 +467,10 @@ fib_process_event_vector(struct rte_event_vector *vec) /* Lookup IPv6 hops if IPv6 packets are present. */ if (ipv6_cnt > 0) - rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr, - hopsv6, ipv6_cnt); - - if (vec->attr_valid) { - nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0]; - if (nh != FIB_DEFAULT_HOP) - vec->port = nh; - else - vec->attr_valid = 0; - } + rte_fib6_lookup_bulk( + lconf->ipv6_lookup_struct, + (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6, + ipv6_cnt); /* Assign ports looked up in fib depending on IPv4 or IPv6 */ for (i = 0; i < vec->nb_elem; i++) { @@ -481,9 +479,26 @@ fib_process_event_vector(struct rte_event_vector *vec) else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - mbufs[i]->port = nh; - event_vector_attr_validate(vec, mbufs[i]); + hops[i] = nh; + else + hops[i] = vec->attr_valid ? vec->port : + vec->mbufs[i]->port; } + +#if defined FIB_SEND_MULTI + uint16_t k; + k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != k; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &hops[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#else + for (i = 0; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#endif + + process_event_vector(vec, hops); } static __rte_always_inline void @@ -496,7 +511,32 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, const uint8_t event_d_id = evt_rsrc->event_d_id; const uint16_t deq_len = evt_rsrc->deq_depth; struct rte_event events[MAX_PKT_BURST]; + uint8_t *type_arr, **ipv6_arr, *ptr; int nb_enq = 0, nb_deq = 0, i; + uint64_t *hopsv4, *hopsv6; + uint32_t *ipv4_arr; + uint16_t *hops; + uintptr_t mem; + + mem = (uintptr_t)rte_zmalloc( + "vector_fib", + (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) + + sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) + + (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) * + evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (mem == 0) + return; + ipv4_arr = (uint32_t *)mem; + type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size]; + hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size]; + hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size]; + hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size]; + ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size]; + + ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size]; + for (i = 0; i < evt_rsrc->vector_size; i++) + ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i]; if (event_p_id < 0) return; @@ -519,10 +559,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - fib_process_event_vector(events[i].vec); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + fib_process_event_vector(events[i].vec, type_arr, + ipv6_arr, hopsv4, hopsv6, + ipv4_arr, hops); } if (flags & L3FWD_EVENT_TX_ENQ) { -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 1/5] examples/l3fwd: fix port group mask generation 2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (3 preceding siblings ...) 2022-08-29 9:44 ` [PATCH 5/5] examples/l3fwd: fix event vector processing in fib pbhagavatula @ 2022-09-02 9:18 ` pbhagavatula 2022-09-02 9:18 ` [PATCH v2 2/5] examples/l3fwd: split processing and send stages pbhagavatula ` (5 more replies) 4 siblings, 6 replies; 41+ messages in thread From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw) To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix port group mask generation in altivec, vec_any_eq returns 0 or 1 while port_groupx4 expects comparison mask result. Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v2 Changes: - Fix PPC, RISC-V, aarch32 compilation. examples/common/altivec/port_group.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h index 5e209b02fa..592ef80b7f 100644 --- a/examples/common/altivec/port_group.h +++ b/examples/common/altivec/port_group.h @@ -26,12 +26,19 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16_t u16[FWDSTEP + 1]; uint64_t u64; } *pnum = (void *)pn; + union u_vec { + __vector unsigned short v_us; + unsigned short s[8]; + }; + union u_vec res; int32_t v; - v = vec_any_eq(dp1, dp2); - + dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2); + res.v_us = dp1; + v = (res.s[0] & 0x1) | (res.s[1] & 0x2) | (res.s[2] & 0x4) | + (res.s[3] & 0x8); /* update last port counter. */ lp[0] += gptbl[v].lpv; -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 2/5] examples/l3fwd: split processing and send stages 2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula @ 2022-09-02 9:18 ` pbhagavatula 2022-09-02 9:18 ` [PATCH v2 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula ` (4 subsequent siblings) 5 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Split packet processing from packet send stage, as send stage is not common for poll and event mode. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++----------- examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++--- examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++------- examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++--- 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index e76f2760b0..12b997e477 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, return portid; } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t *dst_port, uint16_t portid, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i, j, pos; - uint16_t dst_port[MAX_PKT_BURST]; /* * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets @@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, dst_port[j + i] = em_get_dst_port(qconf, pkts_burst[j + i], portid); } + + for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); } - for (; j < nb_rx; j++) + for (; j < nb_rx; j++) { dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &pkts_burst[j]->port); + } +} - send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0); + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } /* @@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, */ int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) { + for (j = 0; j < nb_rx; j++) pkts_burst[j] = ev[j]->mbuf; - rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *) + 1); - } for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { @@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, } continue; } - processx4_step3(&pkts_burst[j], &dst_port[j]); + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) pkts_burst[j + i]->port = dst_port[j + i]; diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h index 0c6852a7bb..adb82f1478 100644 --- a/examples/l3fwd/l3fwd_lpm_altivec.h +++ b/examples/l3fwd/l3fwd_lpm_altivec.h @@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint8_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __vector unsigned int dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h index 78ee83b76c..2a68c4c15e 100644 --- a/examples/l3fwd/l3fwd_lpm_neon.h +++ b/examples/l3fwd/l3fwd_lpm_neon.h @@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf, } } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i = 0, j = 0; - uint16_t dst_port[MAX_PKT_BURST]; int32x4_t dip; uint32_t ipv4_flag; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], void *)); } - for (j = 0; j != k - FWDSTEP; j += FWDSTEP) { for (i = 0; i < FWDSTEP; i++) { rte_prefetch0(rte_pktmbuf_mtod( @@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); } processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); j += FWDSTEP; } @@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, void *)); j++; } - j -= m; /* Classify last up to 3 packets one by one */ switch (m) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); } } +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h index 3f637a23d1..db15030320 100644 --- a/examples/l3fwd/l3fwd_lpm_sse.h +++ b/examples/l3fwd/l3fwd_lpm_sse.h @@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __m128i dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step2(qconf, dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 3/5] examples/l3fwd: use lpm vector path for event vector 2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-09-02 9:18 ` [PATCH v2 2/5] examples/l3fwd: split processing and send stages pbhagavatula @ 2022-09-02 9:18 ` pbhagavatula 2022-09-02 9:18 ` [PATCH v2 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula ` (3 subsequent siblings) 5 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use lpm vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++ examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++ examples/l3fwd/l3fwd_lpm.c | 38 ++++++++++-------- examples/l3fwd/l3fwd_neon.h | 45 +++++++++++++++++++++ examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++ 5 files changed, 211 insertions(+), 16 deletions(-) diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h index 87018f5dbe..e45e138e59 100644 --- a/examples/l3fwd/l3fwd_altivec.h +++ b/examples/l3fwd/l3fwd_altivec.h @@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __vector unsigned short dp1; + __vector unsigned short dp; + + dp = (__vector unsigned short)vec_splats((short)dst_ports[0]); + dp1 = *((__vector unsigned short *)&dst_ports[i]); + res = vec_all_eq(dp1, dp); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_ALTIVEC_H_ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index b93841a16f..3fe38aada0 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -82,6 +82,27 @@ struct l3fwd_event_resources { uint64_t vector_tmo_ns; }; +#if defined(RTE_ARCH_X86) +#include "l3fwd_sse.h" +#elif defined __ARM_NEON +#include "l3fwd_neon.h" +#elif defined(RTE_ARCH_PPC_64) +#include "l3fwd_altivec.h" +#else +static inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + int i; + + for (i = 0; i < nb_elem; i++) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + } + + return dst_ports[0]; +} +#endif + static inline void event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) { @@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) } } +static inline uint16_t +filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, + uint16_t nb_pkts) +{ + uint16_t *des_pos, free = 0; + struct rte_mbuf **pos; + int i; + + /* Filter out and free bad packets */ + for (i = 0; i < nb_pkts; i++) { + if (dst_port[i] == BAD_PORT) { + rte_pktmbuf_free(mbufs[i]); + if (!free) { + pos = &mbufs[i]; + des_pos = &dst_port[i]; + } + free++; + continue; + } + + if (free) { + *pos = mbufs[i]; + pos++; + *des_pos = dst_port[i]; + des_pos++; + } + } + return nb_pkts - free; +} + +static inline void +process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port) +{ + uint16_t port, i; + + vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem); + /* Verify destination array */ + port = process_dst_port(dst_port, vec->nb_elem); + if (port == BAD_PORT) { + vec->attr_valid = 0; + for (i = 0; i < vec->nb_elem; i++) { + vec->mbufs[i]->port = dst_port[i]; + rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0); + } + } else { + vec->attr_valid = 1; + vec->port = port; + vec->queue = 0; + } +} struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void); void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf); diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index d1b850dd5b..1652b7c470 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf) +lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf, + uint16_t *dst_port) { struct rte_mbuf **mbufs = vec->mbufs; int i; - /* Process first packet to init vector attributes */ - lpm_process_event_pkt(lconf, mbufs[0]); +#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64 if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; + l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port, + dst_port, lconf, 1); + } else { + for (i = 0; i < vec->nb_elem; i++) + l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port, + &dst_port[i], lconf, 1); } +#else + for (i = 0; i < vec->nb_elem; i++) + dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]); +#endif - for (i = 1; i < vec->nb_elem; i++) { - lpm_process_event_pkt(lconf, mbufs[i]); - event_vector_attr_validate(vec, mbufs[i]); - } + process_event_vector(vec, dst_port); } /* Same eventdev loop for single and burst of vector */ @@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, struct rte_event events[MAX_PKT_BURST]; int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; + uint16_t *dst_port_list; unsigned int lcore_id; if (event_p_id < 0) @@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; - + dst_port_list = + rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_port_list == NULL) + return; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); while (!force_quit) { @@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - lpm_process_event_vector(events[i].vec, lconf); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + lpm_process_event_vector(events[i].vec, lconf, + dst_port_list); } if (flags & L3FWD_EVENT_TX_ENQ) { diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h index ce515e0bc4..4d98288707 100644 --- a/examples/l3fwd/l3fwd_neon.h +++ b/examples/l3fwd/l3fwd_neon.h @@ -194,4 +194,49 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + +#if defined(RTE_ARCH_ARM64) + while (nb_elem > 7) { + uint16x8_t dp = vdupq_n_u16(dst_ports[0]); + uint16x8_t dp1; + + dp1 = vld1q_u16(&dst_ports[i]); + dp1 = vceqq_u16(dp1, dp); + res = vminvq_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + uint16x4_t dp = vdup_n_u16(dst_ports[0]); + uint16x4_t dp1; + + dp1 = vld1_u16(&dst_ports[i]); + dp1 = vceq_u16(dp1, dp); + res = vminv_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } +#endif + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_NEON_H_ */ diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h index 0f0d0323a2..083729cdef 100644 --- a/examples/l3fwd/l3fwd_sse.h +++ b/examples/l3fwd/l3fwd_sse.h @@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + res = _mm_movemask_epi8(dp1); + if (res != 0xFFFF) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + res = _mm_movemask_ps((__m128)dp1); + if (res != 0xF) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_SSE_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 4/5] examples/l3fwd: fix event vector processing in fib 2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-09-02 9:18 ` [PATCH v2 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-09-02 9:18 ` [PATCH v2 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula @ 2022-09-02 9:18 ` pbhagavatula 2022-09-02 9:18 ` [PATCH v2 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula ` (2 subsequent siblings) 5 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix stack overflow when event vector size is greater than MAX_BURST_SIZE. Add missing mac swap and rfc1812 stage. Fixes: e8adca1951d4 ("examples/l3fwd: support event vector") Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_fib.c | 124 ++++++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 37 deletions(-) diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c index e02e4b3f5a..ada5d0d430 100644 --- a/examples/l3fwd/l3fwd_fib.c +++ b/examples/l3fwd/l3fwd_fib.c @@ -77,27 +77,38 @@ fib_parse_packet(struct rte_mbuf *mbuf, */ #if !defined FIB_SEND_MULTI static inline void -fib_send_single(int nb_tx, struct lcore_conf *qconf, - struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +process_packet(struct rte_mbuf *pkt, uint16_t *hop) { - int32_t j; struct rte_ether_hdr *eth_hdr; - for (j = 0; j < nb_tx; j++) { - /* Run rfc1812 if packet is ipv4 and checks enabled. */ + /* Run rfc1812 if packet is ipv4 and checks enabled. */ #if defined DO_RFC_1812_CHECKS - rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( - pkts_burst[j], struct rte_ether_hdr *) + 1), - &hops[j], pkts_burst[j]->packet_type); + rfc1812_process( + (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( + pkt, struct rte_ether_hdr *) + + 1), + hop, pkt->packet_type, + pkt->ol_flags & RTE_MBUF_F_RX_IP_CKSUM_MASK); #endif - /* Set MAC addresses. */ - eth_hdr = rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *); - *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]]; - rte_ether_addr_copy(&ports_eth_addr[hops[j]], - ð_hdr->src_addr); + /* Set MAC addresses. */ + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); + *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop]; + rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr); +} + +static inline void +fib_send_single(int nb_tx, struct lcore_conf *qconf, + struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +{ + int32_t j; + for (j = 0; j < nb_tx; j++) { + process_packet(pkts_burst[j], &hops[j]); + if (hops[j] == BAD_PORT) { + rte_pktmbuf_free(pkts_burst[j]); + continue; + } /* Send single packet. */ send_single_packet(qconf, pkts_burst[j], hops[j]); } @@ -261,7 +272,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, uint32_t ipv4_arr[MAX_PKT_BURST]; uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; - uint16_t nh; + uint16_t nh, hops[MAX_PKT_BURST]; uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; uint32_t ipv4_arr_assem, ipv6_arr_assem; @@ -350,7 +361,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - events[i].mbuf->port = nh; + hops[i] = nh != FIB_DEFAULT_HOP ? + nh : + events[i].mbuf->port; + process_packet(events[i].mbuf, &hops[i]); + events[i].mbuf->port = hops[i] != BAD_PORT ? + hops[i] : + events[i].mbuf->port; } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -418,14 +435,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -fib_process_event_vector(struct rte_event_vector *vec) +fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr, + uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6, + uint32_t *ipv4_arr, uint16_t *hops) { - uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; - uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; uint32_t ipv4_arr_assem, ipv6_arr_assem; struct rte_mbuf **mbufs = vec->mbufs; - uint32_t ipv4_arr[MAX_PKT_BURST]; - uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; struct lcore_conf *lconf; uint16_t nh; @@ -463,16 +478,10 @@ fib_process_event_vector(struct rte_event_vector *vec) /* Lookup IPv6 hops if IPv6 packets are present. */ if (ipv6_cnt > 0) - rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr, - hopsv6, ipv6_cnt); - - if (vec->attr_valid) { - nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0]; - if (nh != FIB_DEFAULT_HOP) - vec->port = nh; - else - vec->attr_valid = 0; - } + rte_fib6_lookup_bulk( + lconf->ipv6_lookup_struct, + (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6, + ipv6_cnt); /* Assign ports looked up in fib depending on IPv4 or IPv6 */ for (i = 0; i < vec->nb_elem; i++) { @@ -481,9 +490,26 @@ fib_process_event_vector(struct rte_event_vector *vec) else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - mbufs[i]->port = nh; - event_vector_attr_validate(vec, mbufs[i]); + hops[i] = nh; + else + hops[i] = vec->attr_valid ? vec->port : + vec->mbufs[i]->port; } + +#if defined FIB_SEND_MULTI + uint16_t k; + k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != k; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &hops[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#else + for (i = 0; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#endif + + process_event_vector(vec, hops); } static __rte_always_inline void @@ -496,7 +522,32 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, const uint8_t event_d_id = evt_rsrc->event_d_id; const uint16_t deq_len = evt_rsrc->deq_depth; struct rte_event events[MAX_PKT_BURST]; + uint8_t *type_arr, **ipv6_arr, *ptr; int nb_enq = 0, nb_deq = 0, i; + uint64_t *hopsv4, *hopsv6; + uint32_t *ipv4_arr; + uint16_t *hops; + uintptr_t mem; + + mem = (uintptr_t)rte_zmalloc( + "vector_fib", + (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) + + sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) + + (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) * + evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (mem == 0) + return; + ipv4_arr = (uint32_t *)mem; + type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size]; + hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size]; + hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size]; + hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size]; + ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size]; + + ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size]; + for (i = 0; i < evt_rsrc->vector_size; i++) + ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i]; if (event_p_id < 0) return; @@ -519,10 +570,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - fib_process_event_vector(events[i].vec); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + fib_process_event_vector(events[i].vec, type_arr, + ipv6_arr, hopsv4, hopsv6, + ipv4_arr, hops); } if (flags & L3FWD_EVENT_TX_ENQ) { -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v2 5/5] examples/l3fwd: use em vector path for event vector 2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (2 preceding siblings ...) 2022-09-02 9:18 ` [PATCH v2 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula @ 2022-09-02 9:18 ` pbhagavatula 2022-09-08 18:33 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation David Christensen 2022-09-11 18:12 ` [PATCH v3 " pbhagavatula 5 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-09-02 9:18 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use em vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em.c | 12 +++-- examples/l3fwd/l3fwd_em.h | 29 +++++------ examples/l3fwd/l3fwd_em_hlm.h | 72 +++++----------------------- examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++---- examples/l3fwd/l3fwd_event.h | 21 -------- 5 files changed, 47 insertions(+), 112 deletions(-) diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index 10be24c61d..e7b35cfbd9 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -852,10 +852,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; unsigned int lcore_id; + uint16_t *dst_ports; if (event_p_id < 0) return; + dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_ports == NULL) + return; lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; @@ -877,13 +882,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, } #if defined RTE_ARCH_X86 || defined __ARM_NEON - l3fwd_em_process_event_vector(events[i].vec, lconf); + l3fwd_em_process_event_vector(events[i].vec, lconf, + dst_ports); #else l3fwd_em_no_opt_process_event_vector(events[i].vec, - lconf); + lconf, dst_ports); #endif - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); } if (flags & L3FWD_EVENT_TX_ENQ) { diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h index fe2ee59f6a..7d051fc076 100644 --- a/examples/l3fwd/l3fwd_em.h +++ b/examples/l3fwd/l3fwd_em.h @@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid, } } -static __rte_always_inline void +static __rte_always_inline uint16_t l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) { struct rte_ether_hdr *eth_hdr; @@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf); else m->port = BAD_PORT; + + return m->port; } /* @@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events, static inline void l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, + uint16_t *dst_ports) { struct rte_mbuf **mbufs = vec->mbufs; int32_t i; @@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++) rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *)); - /* Process first packet to init vector attributes */ - l3fwd_em_simple_process(mbufs[0], qconf); - if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; - } - /* * Prefetch and forward already prefetched packets. */ - for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { + for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { rte_prefetch0( rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *)); - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); } /* Forward remaining prefetched packets */ - for (; i < vec->nb_elem; i++) { - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); - } + for (; i < vec->nb_elem; i++) + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 12b997e477..2e11eefad7 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_port) { - struct rte_mbuf **mbufs = vec->mbufs; - uint16_t dst_port[MAX_PKT_BURST]; - int32_t i, j, n, pos; - - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++) - rte_prefetch0( - rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); + uint16_t i; if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - - n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { - uint32_t pkt_type = - RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP; - uint32_t l3_type, tcp_or_udp; - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) - pkt_type &= mbufs[j + i]->packet_type; - - l3_type = pkt_type & RTE_PTYPE_L3_MASK; - tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); - - for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; - i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem; - i++, pos++) { - rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos], - struct rte_ether_hdr *) + - 1); - } - - if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { - em_get_dst_port_ipv4xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { - em_get_dst_port_ipv6xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else { - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = - em_get_dst_port(qconf, mbufs[j + i], - mbufs[j + i]->port); - process_packet(mbufs[j + i], - &mbufs[j + i]->port); - event_vector_attr_validate(vec, mbufs[j + i]); - } - continue; - } - processx4_step3(&mbufs[j], &dst_port[j]); - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = dst_port[j + i]; - event_vector_attr_validate(vec, mbufs[j + i]); - } - } - - for (; j < vec->nb_elem; j++) { - mbufs[j]->port = - em_get_dst_port(qconf, mbufs[j], mbufs[j]->port); - process_packet(mbufs[j], &mbufs[j]->port); - event_vector_attr_validate(vec, mbufs[j]); - } + l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port, + vec->port, qconf, 1); + else + for (i = 0; i < vec->nb_elem; i++) + l3fwd_em_process_packets(1, &vec->mbufs[i], + &dst_port[i], + vec->mbufs[i]->port, qconf, 1); + + process_event_vector(vec, dst_port); } #endif /* __L3FWD_EM_HLM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index d2f75edb8a..067f23889a 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events, for (i = 1, j = 0; j < nb_rx; i++, j++) { struct rte_mbuf *mbuf = events[j]->mbuf; + uint16_t port; if (i < nb_rx) { rte_prefetch0(rte_pktmbuf_mtod( events[i]->mbuf, struct rte_ether_hdr *) + 1); } + port = mbuf->port; mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); + if (mbuf->port == BAD_PORT) + mbuf->port = port; } } static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_ports) { + const uint8_t attr_valid = vec->attr_valid; struct rte_mbuf **mbufs = vec->mbufs; int32_t i, j; rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1); - if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - for (i = 0, j = 1; i < vec->nb_elem; i++, j++) { if (j < vec->nb_elem) rte_prefetch0(rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); - mbufs[i]->port = - em_get_dst_port(qconf, mbufs[i], mbufs[i]->port); - process_packet(mbufs[i], &mbufs[i]->port); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = em_get_dst_port(qconf, mbufs[i], + attr_valid ? vec->port : + mbufs[i]->port); } + j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != j; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &dst_ports[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &dst_ports[i]); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_SEQUENTIAL_H__ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index 3fe38aada0..e21817c36b 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) } #endif -static inline void -event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) -{ - /* l3fwd application only changes mbuf port while processing */ - if (vec->attr_valid && (vec->port != mbuf->port)) - vec->attr_valid = 0; -} - -static inline void -event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) -{ - if (vec->attr_valid) { - vec->queue = txq; - } else { - int i; - - for (i = 0; i < vec->nb_elem; i++) - rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq); - } -} - static inline uint16_t filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, uint16_t nb_pkts) -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH v2 1/5] examples/l3fwd: fix port group mask generation 2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (3 preceding siblings ...) 2022-09-02 9:18 ` [PATCH v2 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula @ 2022-09-08 18:33 ` David Christensen 2022-09-09 5:56 ` [EXT] " Pavan Nikhilesh Bhagavatula 2022-09-11 18:12 ` [PATCH v3 " pbhagavatula 5 siblings, 1 reply; 41+ messages in thread From: David Christensen @ 2022-09-08 18:33 UTC (permalink / raw) To: pbhagavatula, jerinj; +Cc: dev, stable On 9/2/22 2:18 AM, pbhagavatula@marvell.com wrote: > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Fix port group mask generation in altivec, vec_any_eq returns > 0 or 1 while port_groupx4 expects comparison mask result. > > Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc") > Cc: stable@dpdk.org > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > --- > v2 Changes: > - Fix PPC, RISC-V, aarch32 compilation. > > examples/common/altivec/port_group.h | 11 +++++++++-- > 1 file changed, 9 insertions(+), 2 deletions(-) > > diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h > index 5e209b02fa..592ef80b7f 100644 > --- a/examples/common/altivec/port_group.h > +++ b/examples/common/altivec/port_group.h > @@ -26,12 +26,19 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, > uint16_t u16[FWDSTEP + 1]; > uint64_t u64; > } *pnum = (void *)pn; > + union u_vec { > + __vector unsigned short v_us; > + unsigned short s[8]; > + }; > > + union u_vec res; > int32_t v; > > - v = vec_any_eq(dp1, dp2); > - > + dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2); Altivec vec_cmpeq() is similar to Intel _mm_cmpeq_*(), so this looks right to me. > + res.v_us = dp1; > > + v = (res.s[0] & 0x1) | (res.s[1] & 0x2) | (res.s[2] & 0x4) | > + (res.s[3] & 0x8); This can be vectorized too. The Intel _mm_unpacklo_epi16() intrinsic can be replaced with the following Altivec code: extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi16 (__m128i __A, __m128i __B) { return (__m128i) vec_mergeh ((__v8hi)__A, (__v8hi)__B); } The Intel _mm_movemask_ps() intrinsic can be replaced with the following Altivec implementation: /* Creates a 4-bit mask from the most significant bits of the SPFP values. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_ps (__m128 __A) { __vector unsigned long long result; static const __vector unsigned int perm_mask = { #ifdef __LITTLE_ENDIAN__ 0x00204060, 0x80808080, 0x80808080, 0x80808080 #else 0x80808080, 0x80808080, 0x80808080, 0x00204060 #endif }; result = ((__vector unsigned long long) vec_vbpermq ((__vector unsigned char) __A, (__vector unsigned char) perm_mask)); #ifdef __LITTLE_ENDIAN__ return result[1]; #else return result[0]; #endif } Dave ^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] Re: [PATCH v2 1/5] examples/l3fwd: fix port group mask generation 2022-09-08 18:33 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation David Christensen @ 2022-09-09 5:56 ` Pavan Nikhilesh Bhagavatula 0 siblings, 0 replies; 41+ messages in thread From: Pavan Nikhilesh Bhagavatula @ 2022-09-09 5:56 UTC (permalink / raw) To: David Christensen, Jerin Jacob Kollanukkaran; +Cc: dev, stable > On 9/2/22 2:18 AM, pbhagavatula@marvell.com wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > > > Fix port group mask generation in altivec, vec_any_eq returns > > 0 or 1 while port_groupx4 expects comparison mask result. > > > > Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on > powerpc") > > Cc: stable@dpdk.org > > > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > > --- > > v2 Changes: > > - Fix PPC, RISC-V, aarch32 compilation. > > > > examples/common/altivec/port_group.h | 11 +++++++++-- > > 1 file changed, 9 insertions(+), 2 deletions(-) > > > > diff --git a/examples/common/altivec/port_group.h > b/examples/common/altivec/port_group.h > > index 5e209b02fa..592ef80b7f 100644 > > --- a/examples/common/altivec/port_group.h > > +++ b/examples/common/altivec/port_group.h > > @@ -26,12 +26,19 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t > *lp, > > uint16_t u16[FWDSTEP + 1]; > > uint64_t u64; > > } *pnum = (void *)pn; > > + union u_vec { > > + __vector unsigned short v_us; > > + unsigned short s[8]; > > + }; > > > > + union u_vec res; > > int32_t v; > > > > - v = vec_any_eq(dp1, dp2); > > - > > + dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2); > > Altivec vec_cmpeq() is similar to Intel _mm_cmpeq_*(), so this looks > right to me. > > > + res.v_us = dp1; > > > > + v = (res.s[0] & 0x1) | (res.s[1] & 0x2) | (res.s[2] & 0x4) | > > + (res.s[3] & 0x8); > > This can be vectorized too. The Intel _mm_unpacklo_epi16() intrinsic > can be replaced with the following Altivec code: > > extern __inline __m128i __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > _mm_unpacklo_epi16 (__m128i __A, __m128i __B) > { > return (__m128i) vec_mergeh ((__v8hi)__A, (__v8hi)__B); > } > > The Intel _mm_movemask_ps() intrinsic can be replaced with the following > Altivec implementation: > > /* Creates a 4-bit mask from the most significant bits of the SPFP > values. */ > extern __inline int __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > _mm_movemask_ps (__m128 __A) > { > __vector unsigned long long result; > static const __vector unsigned int perm_mask = > { > #ifdef __LITTLE_ENDIAN__ > 0x00204060, 0x80808080, 0x80808080, 0x80808080 > #else > 0x80808080, 0x80808080, 0x80808080, 0x00204060 > #endif > }; > > result = ((__vector unsigned long long) > vec_vbpermq ((__vector unsigned char) __A, > (__vector unsigned char) perm_mask)); > > #ifdef __LITTLE_ENDIAN__ > return result[1]; > #else > return result[0]; > #endif > } > Sure I will add this to the next version. > Dave Thanks, Pavan. ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 1/5] examples/l3fwd: fix port group mask generation 2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (4 preceding siblings ...) 2022-09-08 18:33 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation David Christensen @ 2022-09-11 18:12 ` pbhagavatula 2022-09-11 18:12 ` [PATCH v3 2/5] examples/l3fwd: split processing and send stages pbhagavatula ` (4 more replies) 5 siblings, 5 replies; 41+ messages in thread From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw) To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix port group mask generation in altivec, vec_any_eq returns 0 or 1 while port_groupx4 expects comparison mask result. Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v3 Changes: - PPC optimize port mask generation. - Fix aarch32 compilation. v2 Changes: - Fix PPC, RISC-V, aarch32 compilation. examples/common/altivec/port_group.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h index 5e209b02fa..1c05bc025a 100644 --- a/examples/common/altivec/port_group.h +++ b/examples/common/altivec/port_group.h @@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16_t u16[FWDSTEP + 1]; uint64_t u64; } *pnum = (void *)pn; - + __vector unsigned long long result; + const __vector unsigned int perm_mask = {0x00204060, 0x80808080, + 0x80808080, 0x80808080}; int32_t v; - v = vec_any_eq(dp1, dp2); - + dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2); + dp1 = vec_mergeh(dp1, dp1); + result = (__vector unsigned long long)vec_vbpermq( + (__vector unsigned char)dp1, (__vector unsigned char)perm_mask); + v = result[1]; /* update last port counter. */ lp[0] += gptbl[v].lpv; -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 2/5] examples/l3fwd: split processing and send stages 2022-09-11 18:12 ` [PATCH v3 " pbhagavatula @ 2022-09-11 18:12 ` pbhagavatula 2022-09-11 18:12 ` [PATCH v3 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula ` (3 subsequent siblings) 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Split packet processing from packet send stage, as send stage is not common for poll and event mode. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++----------- examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++--- examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++------- examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++--- 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index e76f2760b0..12b997e477 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, return portid; } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t *dst_port, uint16_t portid, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i, j, pos; - uint16_t dst_port[MAX_PKT_BURST]; /* * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets @@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, dst_port[j + i] = em_get_dst_port(qconf, pkts_burst[j + i], portid); } + + for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); } - for (; j < nb_rx; j++) + for (; j < nb_rx; j++) { dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &pkts_burst[j]->port); + } +} - send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0); + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } /* @@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, */ int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) { + for (j = 0; j < nb_rx; j++) pkts_burst[j] = ev[j]->mbuf; - rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *) + 1); - } for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { @@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, } continue; } - processx4_step3(&pkts_burst[j], &dst_port[j]); + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) pkts_burst[j + i]->port = dst_port[j + i]; diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h index 0c6852a7bb..adb82f1478 100644 --- a/examples/l3fwd/l3fwd_lpm_altivec.h +++ b/examples/l3fwd/l3fwd_lpm_altivec.h @@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint8_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __vector unsigned int dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h index 78ee83b76c..2a68c4c15e 100644 --- a/examples/l3fwd/l3fwd_lpm_neon.h +++ b/examples/l3fwd/l3fwd_lpm_neon.h @@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf, } } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i = 0, j = 0; - uint16_t dst_port[MAX_PKT_BURST]; int32x4_t dip; uint32_t ipv4_flag; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], void *)); } - for (j = 0; j != k - FWDSTEP; j += FWDSTEP) { for (i = 0; i < FWDSTEP; i++) { rte_prefetch0(rte_pktmbuf_mtod( @@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); } processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); j += FWDSTEP; } @@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, void *)); j++; } - j -= m; /* Classify last up to 3 packets one by one */ switch (m) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); } } +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h index 3f637a23d1..db15030320 100644 --- a/examples/l3fwd/l3fwd_lpm_sse.h +++ b/examples/l3fwd/l3fwd_lpm_sse.h @@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __m128i dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step2(qconf, dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 3/5] examples/l3fwd: use lpm vector path for event vector 2022-09-11 18:12 ` [PATCH v3 " pbhagavatula 2022-09-11 18:12 ` [PATCH v3 2/5] examples/l3fwd: split processing and send stages pbhagavatula @ 2022-09-11 18:12 ` pbhagavatula 2022-09-11 18:12 ` [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula ` (2 subsequent siblings) 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use lpm vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++ examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++ examples/l3fwd/l3fwd_lpm.c | 38 ++++++++++-------- examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++ examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++ 5 files changed, 213 insertions(+), 16 deletions(-) diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h index 87018f5dbe..e45e138e59 100644 --- a/examples/l3fwd/l3fwd_altivec.h +++ b/examples/l3fwd/l3fwd_altivec.h @@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __vector unsigned short dp1; + __vector unsigned short dp; + + dp = (__vector unsigned short)vec_splats((short)dst_ports[0]); + dp1 = *((__vector unsigned short *)&dst_ports[i]); + res = vec_all_eq(dp1, dp); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_ALTIVEC_H_ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index b93841a16f..3fe38aada0 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -82,6 +82,27 @@ struct l3fwd_event_resources { uint64_t vector_tmo_ns; }; +#if defined(RTE_ARCH_X86) +#include "l3fwd_sse.h" +#elif defined __ARM_NEON +#include "l3fwd_neon.h" +#elif defined(RTE_ARCH_PPC_64) +#include "l3fwd_altivec.h" +#else +static inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + int i; + + for (i = 0; i < nb_elem; i++) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + } + + return dst_ports[0]; +} +#endif + static inline void event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) { @@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) } } +static inline uint16_t +filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, + uint16_t nb_pkts) +{ + uint16_t *des_pos, free = 0; + struct rte_mbuf **pos; + int i; + + /* Filter out and free bad packets */ + for (i = 0; i < nb_pkts; i++) { + if (dst_port[i] == BAD_PORT) { + rte_pktmbuf_free(mbufs[i]); + if (!free) { + pos = &mbufs[i]; + des_pos = &dst_port[i]; + } + free++; + continue; + } + + if (free) { + *pos = mbufs[i]; + pos++; + *des_pos = dst_port[i]; + des_pos++; + } + } + return nb_pkts - free; +} + +static inline void +process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port) +{ + uint16_t port, i; + + vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem); + /* Verify destination array */ + port = process_dst_port(dst_port, vec->nb_elem); + if (port == BAD_PORT) { + vec->attr_valid = 0; + for (i = 0; i < vec->nb_elem; i++) { + vec->mbufs[i]->port = dst_port[i]; + rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0); + } + } else { + vec->attr_valid = 1; + vec->port = port; + vec->queue = 0; + } +} struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void); void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf); diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index d1b850dd5b..1652b7c470 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf) +lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf, + uint16_t *dst_port) { struct rte_mbuf **mbufs = vec->mbufs; int i; - /* Process first packet to init vector attributes */ - lpm_process_event_pkt(lconf, mbufs[0]); +#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64 if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; + l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port, + dst_port, lconf, 1); + } else { + for (i = 0; i < vec->nb_elem; i++) + l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port, + &dst_port[i], lconf, 1); } +#else + for (i = 0; i < vec->nb_elem; i++) + dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]); +#endif - for (i = 1; i < vec->nb_elem; i++) { - lpm_process_event_pkt(lconf, mbufs[i]); - event_vector_attr_validate(vec, mbufs[i]); - } + process_event_vector(vec, dst_port); } /* Same eventdev loop for single and burst of vector */ @@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, struct rte_event events[MAX_PKT_BURST]; int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; + uint16_t *dst_port_list; unsigned int lcore_id; if (event_p_id < 0) @@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; - + dst_port_list = + rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_port_list == NULL) + return; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); while (!force_quit) { @@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - lpm_process_event_vector(events[i].vec, lconf); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + lpm_process_event_vector(events[i].vec, lconf, + dst_port_list); } if (flags & L3FWD_EVENT_TX_ENQ) { diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h index ce515e0bc4..bf365341fb 100644 --- a/examples/l3fwd/l3fwd_neon.h +++ b/examples/l3fwd/l3fwd_neon.h @@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0; + +#if defined(RTE_ARCH_ARM64) + uint16_t res; + + while (nb_elem > 7) { + uint16x8_t dp = vdupq_n_u16(dst_ports[0]); + uint16x8_t dp1; + + dp1 = vld1q_u16(&dst_ports[i]); + dp1 = vceqq_u16(dp1, dp); + res = vminvq_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + uint16x4_t dp = vdup_n_u16(dst_ports[0]); + uint16x4_t dp1; + + dp1 = vld1_u16(&dst_ports[i]); + dp1 = vceq_u16(dp1, dp); + res = vminv_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } +#endif + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_NEON_H_ */ diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h index 0f0d0323a2..083729cdef 100644 --- a/examples/l3fwd/l3fwd_sse.h +++ b/examples/l3fwd/l3fwd_sse.h @@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + res = _mm_movemask_epi8(dp1); + if (res != 0xFFFF) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + res = _mm_movemask_ps((__m128)dp1); + if (res != 0xF) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_SSE_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib 2022-09-11 18:12 ` [PATCH v3 " pbhagavatula 2022-09-11 18:12 ` [PATCH v3 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-09-11 18:12 ` [PATCH v3 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula @ 2022-09-11 18:12 ` pbhagavatula 2022-10-07 20:03 ` [EXT] " Shijith Thotton 2022-09-11 18:12 ` [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula 2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 4 siblings, 1 reply; 41+ messages in thread From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix stack overflow when event vector size is greater than MAX_BURST_SIZE. Add missing mac swap and rfc1812 stage. Fixes: e8adca1951d4 ("examples/l3fwd: support event vector") Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_fib.c | 123 ++++++++++++++++++++++++++----------- 1 file changed, 86 insertions(+), 37 deletions(-) diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c index e02e4b3f5a..c4a45bc7f3 100644 --- a/examples/l3fwd/l3fwd_fib.c +++ b/examples/l3fwd/l3fwd_fib.c @@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf, */ #if !defined FIB_SEND_MULTI static inline void -fib_send_single(int nb_tx, struct lcore_conf *qconf, - struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +process_packet(struct rte_mbuf *pkt, uint16_t *hop) { - int32_t j; struct rte_ether_hdr *eth_hdr; - for (j = 0; j < nb_tx; j++) { - /* Run rfc1812 if packet is ipv4 and checks enabled. */ + /* Run rfc1812 if packet is ipv4 and checks enabled. */ #if defined DO_RFC_1812_CHECKS - rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( - pkts_burst[j], struct rte_ether_hdr *) + 1), - &hops[j], pkts_burst[j]->packet_type); + rfc1812_process( + (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( + pkt, struct rte_ether_hdr *) + + 1), + hop, pkt->packet_type); #endif - /* Set MAC addresses. */ - eth_hdr = rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *); - *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]]; - rte_ether_addr_copy(&ports_eth_addr[hops[j]], - ð_hdr->src_addr); + /* Set MAC addresses. */ + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); + *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop]; + rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr); +} + +static inline void +fib_send_single(int nb_tx, struct lcore_conf *qconf, + struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +{ + int32_t j; + for (j = 0; j < nb_tx; j++) { + process_packet(pkts_burst[j], &hops[j]); + if (hops[j] == BAD_PORT) { + rte_pktmbuf_free(pkts_burst[j]); + continue; + } /* Send single packet. */ send_single_packet(qconf, pkts_burst[j], hops[j]); } @@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, uint32_t ipv4_arr[MAX_PKT_BURST]; uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; - uint16_t nh; + uint16_t nh, hops[MAX_PKT_BURST]; uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; uint32_t ipv4_arr_assem, ipv6_arr_assem; @@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - events[i].mbuf->port = nh; + hops[i] = nh != FIB_DEFAULT_HOP ? + nh : + events[i].mbuf->port; + process_packet(events[i].mbuf, &hops[i]); + events[i].mbuf->port = hops[i] != BAD_PORT ? + hops[i] : + events[i].mbuf->port; } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -fib_process_event_vector(struct rte_event_vector *vec) +fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr, + uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6, + uint32_t *ipv4_arr, uint16_t *hops) { - uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; - uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; uint32_t ipv4_arr_assem, ipv6_arr_assem; struct rte_mbuf **mbufs = vec->mbufs; - uint32_t ipv4_arr[MAX_PKT_BURST]; - uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; struct lcore_conf *lconf; uint16_t nh; @@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector *vec) /* Lookup IPv6 hops if IPv6 packets are present. */ if (ipv6_cnt > 0) - rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr, - hopsv6, ipv6_cnt); - - if (vec->attr_valid) { - nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0]; - if (nh != FIB_DEFAULT_HOP) - vec->port = nh; - else - vec->attr_valid = 0; - } + rte_fib6_lookup_bulk( + lconf->ipv6_lookup_struct, + (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6, + ipv6_cnt); /* Assign ports looked up in fib depending on IPv4 or IPv6 */ for (i = 0; i < vec->nb_elem; i++) { @@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec) else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - mbufs[i]->port = nh; - event_vector_attr_validate(vec, mbufs[i]); + hops[i] = nh; + else + hops[i] = vec->attr_valid ? vec->port : + vec->mbufs[i]->port; } + +#if defined FIB_SEND_MULTI + uint16_t k; + k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != k; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &hops[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#else + for (i = 0; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#endif + + process_event_vector(vec, hops); } static __rte_always_inline void @@ -496,7 +521,32 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, const uint8_t event_d_id = evt_rsrc->event_d_id; const uint16_t deq_len = evt_rsrc->deq_depth; struct rte_event events[MAX_PKT_BURST]; + uint8_t *type_arr, **ipv6_arr, *ptr; int nb_enq = 0, nb_deq = 0, i; + uint64_t *hopsv4, *hopsv6; + uint32_t *ipv4_arr; + uint16_t *hops; + uintptr_t mem; + + mem = (uintptr_t)rte_zmalloc( + "vector_fib", + (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) + + sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) + + (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) * + evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (mem == 0) + return; + ipv4_arr = (uint32_t *)mem; + type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size]; + hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size]; + hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size]; + hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size]; + ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size]; + + ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size]; + for (i = 0; i < evt_rsrc->vector_size; i++) + ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i]; if (event_p_id < 0) return; @@ -519,10 +569,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - fib_process_event_vector(events[i].vec); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + fib_process_event_vector(events[i].vec, type_arr, + ipv6_arr, hopsv4, hopsv6, + ipv4_arr, hops); } if (flags & L3FWD_EVENT_TX_ENQ) { -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib 2022-09-11 18:12 ` [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula @ 2022-10-07 20:03 ` Shijith Thotton 0 siblings, 0 replies; 41+ messages in thread From: Shijith Thotton @ 2022-10-07 20:03 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran Cc: dev, Pavan Nikhilesh Bhagavatula > >Fix stack overflow when event vector size is greater than >MAX_BURST_SIZE. >Add missing mac swap and rfc1812 stage. > >Fixes: e8adca1951d4 ("examples/l3fwd: support event vector") > >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> >--- > examples/l3fwd/l3fwd_fib.c | 123 ++++++++++++++++++++++++++----------- > 1 file changed, 86 insertions(+), 37 deletions(-) > >diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c >index e02e4b3f5a..c4a45bc7f3 100644 >--- a/examples/l3fwd/l3fwd_fib.c >+++ b/examples/l3fwd/l3fwd_fib.c >@@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf, > */ > #if !defined FIB_SEND_MULTI > static inline void >-fib_send_single(int nb_tx, struct lcore_conf *qconf, >- struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) >+process_packet(struct rte_mbuf *pkt, uint16_t *hop) > { >- int32_t j; > struct rte_ether_hdr *eth_hdr; > >- for (j = 0; j < nb_tx; j++) { >- /* Run rfc1812 if packet is ipv4 and checks enabled. */ >+ /* Run rfc1812 if packet is ipv4 and checks enabled. */ > #if defined DO_RFC_1812_CHECKS >- rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( >- pkts_burst[j], struct rte_ether_hdr *) + 1), >- &hops[j], pkts_burst[j]->packet_type); >+ rfc1812_process( >+ (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( >+ pkt, struct rte_ether_hdr *) + >+ 1), >+ hop, pkt->packet_type); > #endif > >- /* Set MAC addresses. */ >- eth_hdr = rte_pktmbuf_mtod(pkts_burst[j], >- struct rte_ether_hdr *); >- *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]]; >- rte_ether_addr_copy(&ports_eth_addr[hops[j]], >- ð_hdr->src_addr); >+ /* Set MAC addresses. */ >+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); >+ *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop]; >+ rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr); >+} >+ >+static inline void >+fib_send_single(int nb_tx, struct lcore_conf *qconf, >+ struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) >+{ >+ int32_t j; > >+ for (j = 0; j < nb_tx; j++) { >+ process_packet(pkts_burst[j], &hops[j]); >+ if (hops[j] == BAD_PORT) { >+ rte_pktmbuf_free(pkts_burst[j]); >+ continue; >+ } > /* Send single packet. */ > send_single_packet(qconf, pkts_burst[j], hops[j]); > } >@@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, > uint32_t ipv4_arr[MAX_PKT_BURST]; > uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; > uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; >- uint16_t nh; >+ uint16_t nh, hops[MAX_PKT_BURST]; > uint8_t type_arr[MAX_PKT_BURST]; > uint32_t ipv4_cnt, ipv6_cnt; > uint32_t ipv4_arr_assem, ipv6_arr_assem; >@@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, > else > nh = (uint16_t)hopsv6[ipv6_arr_assem++]; > if (nh != FIB_DEFAULT_HOP) >- events[i].mbuf->port = nh; >+ hops[i] = nh != FIB_DEFAULT_HOP ? >+ nh : >+ events[i].mbuf->port; >+ process_packet(events[i].mbuf, &hops[i]); >+ events[i].mbuf->port = hops[i] != BAD_PORT ? >+ hops[i] : >+ events[i].mbuf->port; > } > > if (flags & L3FWD_EVENT_TX_ENQ) { >@@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void >*dummy) > } > > static __rte_always_inline void >-fib_process_event_vector(struct rte_event_vector *vec) >+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr, >+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6, >+ uint32_t *ipv4_arr, uint16_t *hops) > { >- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; >- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; > uint32_t ipv4_arr_assem, ipv6_arr_assem; > struct rte_mbuf **mbufs = vec->mbufs; >- uint32_t ipv4_arr[MAX_PKT_BURST]; >- uint8_t type_arr[MAX_PKT_BURST]; > uint32_t ipv4_cnt, ipv6_cnt; > struct lcore_conf *lconf; > uint16_t nh; >@@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector >*vec) > > /* Lookup IPv6 hops if IPv6 packets are present. */ > if (ipv6_cnt > 0) >- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr, >- hopsv6, ipv6_cnt); >- >- if (vec->attr_valid) { >- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0]; >- if (nh != FIB_DEFAULT_HOP) >- vec->port = nh; >- else >- vec->attr_valid = 0; >- } >+ rte_fib6_lookup_bulk( >+ lconf->ipv6_lookup_struct, >+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6, >+ ipv6_cnt); > > /* Assign ports looked up in fib depending on IPv4 or IPv6 */ > for (i = 0; i < vec->nb_elem; i++) { >@@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec) > else > nh = (uint16_t)hopsv6[ipv6_arr_assem++]; > if (nh != FIB_DEFAULT_HOP) >- mbufs[i]->port = nh; >- event_vector_attr_validate(vec, mbufs[i]); >+ hops[i] = nh; >+ else >+ hops[i] = vec->attr_valid ? vec->port : >+ vec->mbufs[i]->port; > } >+ >+#if defined FIB_SEND_MULTI >+ uint16_t k; >+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); >+ >+ for (i = 0; i != k; i += FWDSTEP) >+ processx4_step3(&vec->mbufs[i], &hops[i]); >+ for (; i < vec->nb_elem; i++) >+ process_packet(vec->mbufs[i], &hops[i]); >+#else >+ for (i = 0; i < vec->nb_elem; i++) >+ process_packet(vec->mbufs[i], &hops[i]); >+#endif >+ >+ process_event_vector(vec, hops); > } > > static __rte_always_inline void >@@ -496,7 +521,32 @@ fib_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > const uint8_t event_d_id = evt_rsrc->event_d_id; > const uint16_t deq_len = evt_rsrc->deq_depth; > struct rte_event events[MAX_PKT_BURST]; >+ uint8_t *type_arr, **ipv6_arr, *ptr; > int nb_enq = 0, nb_deq = 0, i; >+ uint64_t *hopsv4, *hopsv6; >+ uint32_t *ipv4_arr; >+ uint16_t *hops; >+ uintptr_t mem; >+ >+ mem = (uintptr_t)rte_zmalloc( >+ "vector_fib", >+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) + >+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) + >+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) * >+ evt_rsrc->vector_size, >+ RTE_CACHE_LINE_SIZE); Free missing. >+ if (mem == 0) >+ return; >+ ipv4_arr = (uint32_t *)mem; >+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size]; >+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size]; >+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size]; >+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size]; >+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size]; >+ >+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size]; >+ for (i = 0; i < evt_rsrc->vector_size; i++) >+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i]; > > if (event_p_id < 0) > return; >@@ -519,10 +569,9 @@ fib_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > events[i].op = RTE_EVENT_OP_FORWARD; > } > >- fib_process_event_vector(events[i].vec); >- >- if (flags & L3FWD_EVENT_TX_DIRECT) >- event_vector_txq_set(events[i].vec, 0); >+ fib_process_event_vector(events[i].vec, type_arr, >+ ipv6_arr, hopsv4, hopsv6, >+ ipv4_arr, hops); > } > > if (flags & L3FWD_EVENT_TX_ENQ) { >-- >2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector 2022-09-11 18:12 ` [PATCH v3 " pbhagavatula ` (2 preceding siblings ...) 2022-09-11 18:12 ` [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula @ 2022-09-11 18:12 ` pbhagavatula 2022-10-07 20:01 ` [EXT] " Shijith Thotton 2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 4 siblings, 1 reply; 41+ messages in thread From: pbhagavatula @ 2022-09-11 18:12 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use em vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em.c | 12 +++-- examples/l3fwd/l3fwd_em.h | 29 +++++------ examples/l3fwd/l3fwd_em_hlm.h | 72 +++++----------------------- examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++---- examples/l3fwd/l3fwd_event.h | 21 -------- 5 files changed, 47 insertions(+), 112 deletions(-) diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index 10be24c61d..e7b35cfbd9 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -852,10 +852,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; unsigned int lcore_id; + uint16_t *dst_ports; if (event_p_id < 0) return; + dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_ports == NULL) + return; lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; @@ -877,13 +882,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, } #if defined RTE_ARCH_X86 || defined __ARM_NEON - l3fwd_em_process_event_vector(events[i].vec, lconf); + l3fwd_em_process_event_vector(events[i].vec, lconf, + dst_ports); #else l3fwd_em_no_opt_process_event_vector(events[i].vec, - lconf); + lconf, dst_ports); #endif - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); } if (flags & L3FWD_EVENT_TX_ENQ) { diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h index fe2ee59f6a..7d051fc076 100644 --- a/examples/l3fwd/l3fwd_em.h +++ b/examples/l3fwd/l3fwd_em.h @@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid, } } -static __rte_always_inline void +static __rte_always_inline uint16_t l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) { struct rte_ether_hdr *eth_hdr; @@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf); else m->port = BAD_PORT; + + return m->port; } /* @@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events, static inline void l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, + uint16_t *dst_ports) { struct rte_mbuf **mbufs = vec->mbufs; int32_t i; @@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++) rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *)); - /* Process first packet to init vector attributes */ - l3fwd_em_simple_process(mbufs[0], qconf); - if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; - } - /* * Prefetch and forward already prefetched packets. */ - for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { + for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { rte_prefetch0( rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *)); - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); } /* Forward remaining prefetched packets */ - for (; i < vec->nb_elem; i++) { - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); - } + for (; i < vec->nb_elem; i++) + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 12b997e477..2e11eefad7 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_port) { - struct rte_mbuf **mbufs = vec->mbufs; - uint16_t dst_port[MAX_PKT_BURST]; - int32_t i, j, n, pos; - - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++) - rte_prefetch0( - rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); + uint16_t i; if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - - n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { - uint32_t pkt_type = - RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP; - uint32_t l3_type, tcp_or_udp; - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) - pkt_type &= mbufs[j + i]->packet_type; - - l3_type = pkt_type & RTE_PTYPE_L3_MASK; - tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); - - for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; - i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem; - i++, pos++) { - rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos], - struct rte_ether_hdr *) + - 1); - } - - if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { - em_get_dst_port_ipv4xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { - em_get_dst_port_ipv6xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else { - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = - em_get_dst_port(qconf, mbufs[j + i], - mbufs[j + i]->port); - process_packet(mbufs[j + i], - &mbufs[j + i]->port); - event_vector_attr_validate(vec, mbufs[j + i]); - } - continue; - } - processx4_step3(&mbufs[j], &dst_port[j]); - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = dst_port[j + i]; - event_vector_attr_validate(vec, mbufs[j + i]); - } - } - - for (; j < vec->nb_elem; j++) { - mbufs[j]->port = - em_get_dst_port(qconf, mbufs[j], mbufs[j]->port); - process_packet(mbufs[j], &mbufs[j]->port); - event_vector_attr_validate(vec, mbufs[j]); - } + l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port, + vec->port, qconf, 1); + else + for (i = 0; i < vec->nb_elem; i++) + l3fwd_em_process_packets(1, &vec->mbufs[i], + &dst_port[i], + vec->mbufs[i]->port, qconf, 1); + + process_event_vector(vec, dst_port); } #endif /* __L3FWD_EM_HLM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index d2f75edb8a..067f23889a 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events, for (i = 1, j = 0; j < nb_rx; i++, j++) { struct rte_mbuf *mbuf = events[j]->mbuf; + uint16_t port; if (i < nb_rx) { rte_prefetch0(rte_pktmbuf_mtod( events[i]->mbuf, struct rte_ether_hdr *) + 1); } + port = mbuf->port; mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); + if (mbuf->port == BAD_PORT) + mbuf->port = port; } } static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_ports) { + const uint8_t attr_valid = vec->attr_valid; struct rte_mbuf **mbufs = vec->mbufs; int32_t i, j; rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1); - if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - for (i = 0, j = 1; i < vec->nb_elem; i++, j++) { if (j < vec->nb_elem) rte_prefetch0(rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); - mbufs[i]->port = - em_get_dst_port(qconf, mbufs[i], mbufs[i]->port); - process_packet(mbufs[i], &mbufs[i]->port); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = em_get_dst_port(qconf, mbufs[i], + attr_valid ? vec->port : + mbufs[i]->port); } + j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != j; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &dst_ports[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &dst_ports[i]); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_SEQUENTIAL_H__ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index 3fe38aada0..e21817c36b 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) } #endif -static inline void -event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) -{ - /* l3fwd application only changes mbuf port while processing */ - if (vec->attr_valid && (vec->port != mbuf->port)) - vec->attr_valid = 0; -} - -static inline void -event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) -{ - if (vec->attr_valid) { - vec->queue = txq; - } else { - int i; - - for (i = 0; i < vec->nb_elem; i++) - rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq); - } -} - static inline uint16_t filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, uint16_t nb_pkts) -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector 2022-09-11 18:12 ` [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula @ 2022-10-07 20:01 ` Shijith Thotton 0 siblings, 0 replies; 41+ messages in thread From: Shijith Thotton @ 2022-10-07 20:01 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran Cc: dev, Pavan Nikhilesh Bhagavatula >Use em vector path to process event vector. > >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> >--- > examples/l3fwd/l3fwd_em.c | 12 +++-- > examples/l3fwd/l3fwd_em.h | 29 +++++------ > examples/l3fwd/l3fwd_em_hlm.h | 72 +++++----------------------- > examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++---- > examples/l3fwd/l3fwd_event.h | 21 -------- > 5 files changed, 47 insertions(+), 112 deletions(-) > >diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c >index 10be24c61d..e7b35cfbd9 100644 >--- a/examples/l3fwd/l3fwd_em.c >+++ b/examples/l3fwd/l3fwd_em.c >@@ -852,10 +852,15 @@ em_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > int i, nb_enq = 0, nb_deq = 0; > struct lcore_conf *lconf; > unsigned int lcore_id; >+ uint16_t *dst_ports; > > if (event_p_id < 0) > return; > >+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, >+ RTE_CACHE_LINE_SIZE); Free missing. >+ if (dst_ports == NULL) >+ return; > lcore_id = rte_lcore_id(); > lconf = &lcore_conf[lcore_id]; > >@@ -877,13 +882,12 @@ em_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > } > > #if defined RTE_ARCH_X86 || defined __ARM_NEON >- l3fwd_em_process_event_vector(events[i].vec, lconf); >+ l3fwd_em_process_event_vector(events[i].vec, lconf, >+ dst_ports); > #else > l3fwd_em_no_opt_process_event_vector(events[i].vec, >- lconf); >+ lconf, dst_ports); > #endif >- if (flags & L3FWD_EVENT_TX_DIRECT) >- event_vector_txq_set(events[i].vec, 0); > } > > if (flags & L3FWD_EVENT_TX_ENQ) { >diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h >index fe2ee59f6a..7d051fc076 100644 >--- a/examples/l3fwd/l3fwd_em.h >+++ b/examples/l3fwd/l3fwd_em.h >@@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t >portid, > } > } > >-static __rte_always_inline void >+static __rte_always_inline uint16_t > l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) > { > struct rte_ether_hdr *eth_hdr; >@@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct >lcore_conf *qconf) > m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf); > else > m->port = BAD_PORT; >+ >+ return m->port; > } > > /* >@@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct >rte_event **events, > > static inline void > l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, >- struct lcore_conf *qconf) >+ struct lcore_conf *qconf, >+ uint16_t *dst_ports) > { > struct rte_mbuf **mbufs = vec->mbufs; > int32_t i; >@@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct >rte_event_vector *vec, > for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++) > rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *)); > >- /* Process first packet to init vector attributes */ >- l3fwd_em_simple_process(mbufs[0], qconf); >- if (vec->attr_valid) { >- if (mbufs[0]->port != BAD_PORT) >- vec->port = mbufs[0]->port; >- else >- vec->attr_valid = 0; >- } >- > /* > * Prefetch and forward already prefetched packets. > */ >- for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { >+ for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { > rte_prefetch0( > rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void >*)); >- l3fwd_em_simple_process(mbufs[i], qconf); >- event_vector_attr_validate(vec, mbufs[i]); >+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); > } > > /* Forward remaining prefetched packets */ >- for (; i < vec->nb_elem; i++) { >- l3fwd_em_simple_process(mbufs[i], qconf); >- event_vector_attr_validate(vec, mbufs[i]); >- } >+ for (; i < vec->nb_elem; i++) >+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); >+ >+ process_event_vector(vec, dst_ports); > } > > #endif /* __L3FWD_EM_H__ */ >diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h >index 12b997e477..2e11eefad7 100644 >--- a/examples/l3fwd/l3fwd_em_hlm.h >+++ b/examples/l3fwd/l3fwd_em_hlm.h >@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event >**ev, > > static inline void > l3fwd_em_process_event_vector(struct rte_event_vector *vec, >- struct lcore_conf *qconf) >+ struct lcore_conf *qconf, uint16_t *dst_port) > { >- struct rte_mbuf **mbufs = vec->mbufs; >- uint16_t dst_port[MAX_PKT_BURST]; >- int32_t i, j, n, pos; >- >- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++) >- rte_prefetch0( >- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); >+ uint16_t i; > > if (vec->attr_valid) >- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); >- >- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT); >- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { >- uint32_t pkt_type = >- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | >RTE_PTYPE_L4_UDP; >- uint32_t l3_type, tcp_or_udp; >- >- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) >- pkt_type &= mbufs[j + i]->packet_type; >- >- l3_type = pkt_type & RTE_PTYPE_L3_MASK; >- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | >RTE_PTYPE_L4_UDP); >- >- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; >- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem; >- i++, pos++) { >- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos], >- struct rte_ether_hdr *) + >- 1); >- } >- >- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { >- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j], >- &dst_port[j]); >- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { >- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j], >- &dst_port[j]); >- } else { >- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { >- mbufs[j + i]->port = >- em_get_dst_port(qconf, mbufs[j + i], >- mbufs[j + i]->port); >- process_packet(mbufs[j + i], >- &mbufs[j + i]->port); >- event_vector_attr_validate(vec, mbufs[j + i]); >- } >- continue; >- } >- processx4_step3(&mbufs[j], &dst_port[j]); >- >- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { >- mbufs[j + i]->port = dst_port[j + i]; >- event_vector_attr_validate(vec, mbufs[j + i]); >- } >- } >- >- for (; j < vec->nb_elem; j++) { >- mbufs[j]->port = >- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port); >- process_packet(mbufs[j], &mbufs[j]->port); >- event_vector_attr_validate(vec, mbufs[j]); >- } >+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, >dst_port, >+ vec->port, qconf, 1); >+ else >+ for (i = 0; i < vec->nb_elem; i++) >+ l3fwd_em_process_packets(1, &vec->mbufs[i], >+ &dst_port[i], >+ vec->mbufs[i]->port, qconf, 1); >+ >+ process_event_vector(vec, dst_port); > } > > #endif /* __L3FWD_EM_HLM_H__ */ >diff --git a/examples/l3fwd/l3fwd_em_sequential.h >b/examples/l3fwd/l3fwd_em_sequential.h >index d2f75edb8a..067f23889a 100644 >--- a/examples/l3fwd/l3fwd_em_sequential.h >+++ b/examples/l3fwd/l3fwd_em_sequential.h >@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event >**events, > > for (i = 1, j = 0; j < nb_rx; i++, j++) { > struct rte_mbuf *mbuf = events[j]->mbuf; >+ uint16_t port; > > if (i < nb_rx) { > rte_prefetch0(rte_pktmbuf_mtod( > events[i]->mbuf, > struct rte_ether_hdr *) + 1); > } >+ port = mbuf->port; > mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port); > process_packet(mbuf, &mbuf->port); >+ if (mbuf->port == BAD_PORT) >+ mbuf->port = port; > } > } > > static inline void > l3fwd_em_process_event_vector(struct rte_event_vector *vec, >- struct lcore_conf *qconf) >+ struct lcore_conf *qconf, uint16_t *dst_ports) > { >+ const uint8_t attr_valid = vec->attr_valid; > struct rte_mbuf **mbufs = vec->mbufs; > int32_t i, j; > > rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1); > >- if (vec->attr_valid) >- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); >- > for (i = 0, j = 1; i < vec->nb_elem; i++, j++) { > if (j < vec->nb_elem) > rte_prefetch0(rte_pktmbuf_mtod(mbufs[j], > struct rte_ether_hdr *) + > 1); >- mbufs[i]->port = >- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port); >- process_packet(mbufs[i], &mbufs[i]->port); >- event_vector_attr_validate(vec, mbufs[i]); >+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i], >+ attr_valid ? vec->port : >+ mbufs[i]->port); > } >+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); >+ >+ for (i = 0; i != j; i += FWDSTEP) >+ processx4_step3(&vec->mbufs[i], &dst_ports[i]); >+ for (; i < vec->nb_elem; i++) >+ process_packet(vec->mbufs[i], &dst_ports[i]); >+ >+ process_event_vector(vec, dst_ports); > } > > #endif /* __L3FWD_EM_SEQUENTIAL_H__ */ >diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h >index 3fe38aada0..e21817c36b 100644 >--- a/examples/l3fwd/l3fwd_event.h >+++ b/examples/l3fwd/l3fwd_event.h >@@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t >nb_elem) > } > #endif > >-static inline void >-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf >*mbuf) >-{ >- /* l3fwd application only changes mbuf port while processing */ >- if (vec->attr_valid && (vec->port != mbuf->port)) >- vec->attr_valid = 0; >-} >- >-static inline void >-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) >-{ >- if (vec->attr_valid) { >- vec->queue = txq; >- } else { >- int i; >- >- for (i = 0; i < vec->nb_elem; i++) >- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq); >- } >-} >- > static inline uint16_t > filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, > uint16_t nb_pkts) >-- >2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 1/5] examples/l3fwd: fix port group mask generation 2022-09-11 18:12 ` [PATCH v3 " pbhagavatula ` (3 preceding siblings ...) 2022-09-11 18:12 ` [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula @ 2022-10-11 9:08 ` pbhagavatula 2022-10-11 9:08 ` [PATCH v4 2/5] examples/l3fwd: split processing and send stages pbhagavatula ` (4 more replies) 4 siblings, 5 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw) To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix port group mask generation in altivec, vec_any_eq returns 0 or 1 while port_groupx4 expects comparison mask result. Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v4 Changes: - Fix missing `rte_free`. v3 Changes: - PPC optimize port mask generation. - Fix aarch32 compilation. v2 Changes: - Fix PPC, RISC-V, aarch32 compilation. examples/common/altivec/port_group.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h index 5e209b02fa..1c05bc025a 100644 --- a/examples/common/altivec/port_group.h +++ b/examples/common/altivec/port_group.h @@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16_t u16[FWDSTEP + 1]; uint64_t u64; } *pnum = (void *)pn; - + __vector unsigned long long result; + const __vector unsigned int perm_mask = {0x00204060, 0x80808080, + 0x80808080, 0x80808080}; int32_t v; - v = vec_any_eq(dp1, dp2); - + dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2); + dp1 = vec_mergeh(dp1, dp1); + result = (__vector unsigned long long)vec_vbpermq( + (__vector unsigned char)dp1, (__vector unsigned char)perm_mask); + v = result[1]; /* update last port counter. */ lp[0] += gptbl[v].lpv; -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 2/5] examples/l3fwd: split processing and send stages 2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula @ 2022-10-11 9:08 ` pbhagavatula 2022-10-11 9:08 ` [PATCH v4 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula ` (3 subsequent siblings) 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Split packet processing from packet send stage, as send stage is not common for poll and event mode. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++----------- examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++--- examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++------- examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++--- 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index e76f2760b0..12b997e477 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, return portid; } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t *dst_port, uint16_t portid, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i, j, pos; - uint16_t dst_port[MAX_PKT_BURST]; /* * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets @@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, dst_port[j + i] = em_get_dst_port(qconf, pkts_burst[j + i], portid); } + + for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); } - for (; j < nb_rx; j++) + for (; j < nb_rx; j++) { dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &pkts_burst[j]->port); + } +} - send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0); + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } /* @@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, */ int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) { + for (j = 0; j < nb_rx; j++) pkts_burst[j] = ev[j]->mbuf; - rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *) + 1); - } for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { @@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, } continue; } - processx4_step3(&pkts_burst[j], &dst_port[j]); + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) pkts_burst[j + i]->port = dst_port[j + i]; diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h index 0c6852a7bb..adb82f1478 100644 --- a/examples/l3fwd/l3fwd_lpm_altivec.h +++ b/examples/l3fwd/l3fwd_lpm_altivec.h @@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint8_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __vector unsigned int dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h index 78ee83b76c..2a68c4c15e 100644 --- a/examples/l3fwd/l3fwd_lpm_neon.h +++ b/examples/l3fwd/l3fwd_lpm_neon.h @@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf, } } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i = 0, j = 0; - uint16_t dst_port[MAX_PKT_BURST]; int32x4_t dip; uint32_t ipv4_flag; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], void *)); } - for (j = 0; j != k - FWDSTEP; j += FWDSTEP) { for (i = 0; i < FWDSTEP; i++) { rte_prefetch0(rte_pktmbuf_mtod( @@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); } processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); j += FWDSTEP; } @@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, void *)); j++; } - j -= m; /* Classify last up to 3 packets one by one */ switch (m) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); } } +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h index 3f637a23d1..db15030320 100644 --- a/examples/l3fwd/l3fwd_lpm_sse.h +++ b/examples/l3fwd/l3fwd_lpm_sse.h @@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __m128i dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step2(qconf, dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 3/5] examples/l3fwd: use lpm vector path for event vector 2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-10-11 9:08 ` [PATCH v4 2/5] examples/l3fwd: split processing and send stages pbhagavatula @ 2022-10-11 9:08 ` pbhagavatula 2022-10-11 9:08 ` [PATCH v4 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula ` (2 subsequent siblings) 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use lpm vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++ examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++ examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++-------- examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++ examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++ 5 files changed, 214 insertions(+), 16 deletions(-) diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h index 87018f5dbe..e45e138e59 100644 --- a/examples/l3fwd/l3fwd_altivec.h +++ b/examples/l3fwd/l3fwd_altivec.h @@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __vector unsigned short dp1; + __vector unsigned short dp; + + dp = (__vector unsigned short)vec_splats((short)dst_ports[0]); + dp1 = *((__vector unsigned short *)&dst_ports[i]); + res = vec_all_eq(dp1, dp); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_ALTIVEC_H_ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index b93841a16f..3fe38aada0 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -82,6 +82,27 @@ struct l3fwd_event_resources { uint64_t vector_tmo_ns; }; +#if defined(RTE_ARCH_X86) +#include "l3fwd_sse.h" +#elif defined __ARM_NEON +#include "l3fwd_neon.h" +#elif defined(RTE_ARCH_PPC_64) +#include "l3fwd_altivec.h" +#else +static inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + int i; + + for (i = 0; i < nb_elem; i++) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + } + + return dst_ports[0]; +} +#endif + static inline void event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) { @@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) } } +static inline uint16_t +filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, + uint16_t nb_pkts) +{ + uint16_t *des_pos, free = 0; + struct rte_mbuf **pos; + int i; + + /* Filter out and free bad packets */ + for (i = 0; i < nb_pkts; i++) { + if (dst_port[i] == BAD_PORT) { + rte_pktmbuf_free(mbufs[i]); + if (!free) { + pos = &mbufs[i]; + des_pos = &dst_port[i]; + } + free++; + continue; + } + + if (free) { + *pos = mbufs[i]; + pos++; + *des_pos = dst_port[i]; + des_pos++; + } + } + return nb_pkts - free; +} + +static inline void +process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port) +{ + uint16_t port, i; + + vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem); + /* Verify destination array */ + port = process_dst_port(dst_port, vec->nb_elem); + if (port == BAD_PORT) { + vec->attr_valid = 0; + for (i = 0; i < vec->nb_elem; i++) { + vec->mbufs[i]->port = dst_port[i]; + rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0); + } + } else { + vec->attr_valid = 1; + vec->port = port; + vec->queue = 0; + } +} struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void); void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf); diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index 22d7f61a42..5172979c72 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf) +lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf, + uint16_t *dst_port) { struct rte_mbuf **mbufs = vec->mbufs; int i; - /* Process first packet to init vector attributes */ - lpm_process_event_pkt(lconf, mbufs[0]); +#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64 if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; + l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port, + dst_port, lconf, 1); + } else { + for (i = 0; i < vec->nb_elem; i++) + l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port, + &dst_port[i], lconf, 1); } +#else + for (i = 0; i < vec->nb_elem; i++) + dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]); +#endif - for (i = 1; i < vec->nb_elem; i++) { - lpm_process_event_pkt(lconf, mbufs[i]); - event_vector_attr_validate(vec, mbufs[i]); - } + process_event_vector(vec, dst_port); } /* Same eventdev loop for single and burst of vector */ @@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, struct rte_event events[MAX_PKT_BURST]; int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; + uint16_t *dst_port_list; unsigned int lcore_id; if (event_p_id < 0) @@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; - + dst_port_list = + rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_port_list == NULL) + return; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); while (!force_quit) { @@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - lpm_process_event_vector(events[i].vec, lconf); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + lpm_process_event_vector(events[i].vec, lconf, + dst_port_list); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free(dst_port_list); } int __rte_noinline diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h index ce515e0bc4..bf365341fb 100644 --- a/examples/l3fwd/l3fwd_neon.h +++ b/examples/l3fwd/l3fwd_neon.h @@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0; + +#if defined(RTE_ARCH_ARM64) + uint16_t res; + + while (nb_elem > 7) { + uint16x8_t dp = vdupq_n_u16(dst_ports[0]); + uint16x8_t dp1; + + dp1 = vld1q_u16(&dst_ports[i]); + dp1 = vceqq_u16(dp1, dp); + res = vminvq_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + uint16x4_t dp = vdup_n_u16(dst_ports[0]); + uint16x4_t dp1; + + dp1 = vld1_u16(&dst_ports[i]); + dp1 = vceq_u16(dp1, dp); + res = vminv_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } +#endif + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_NEON_H_ */ diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h index 0f0d0323a2..083729cdef 100644 --- a/examples/l3fwd/l3fwd_sse.h +++ b/examples/l3fwd/l3fwd_sse.h @@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + res = _mm_movemask_epi8(dp1); + if (res != 0xFFFF) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + res = _mm_movemask_ps((__m128)dp1); + if (res != 0xF) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_SSE_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 4/5] examples/l3fwd: fix event vector processing in fib 2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-10-11 9:08 ` [PATCH v4 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-10-11 9:08 ` [PATCH v4 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula @ 2022-10-11 9:08 ` pbhagavatula 2022-10-11 9:08 ` [PATCH v4 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula 2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix stack overflow when event vector size is greater than MAX_BURST_SIZE. Add missing mac swap and rfc1812 stage. Fixes: e8adca1951d4 ("examples/l3fwd: support event vector") Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_fib.c | 130 ++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 39 deletions(-) diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c index b82e0c0354..407e9def71 100644 --- a/examples/l3fwd/l3fwd_fib.c +++ b/examples/l3fwd/l3fwd_fib.c @@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf, */ #if !defined FIB_SEND_MULTI static inline void -fib_send_single(int nb_tx, struct lcore_conf *qconf, - struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +process_packet(struct rte_mbuf *pkt, uint16_t *hop) { - int32_t j; struct rte_ether_hdr *eth_hdr; - for (j = 0; j < nb_tx; j++) { - /* Run rfc1812 if packet is ipv4 and checks enabled. */ + /* Run rfc1812 if packet is ipv4 and checks enabled. */ #if defined DO_RFC_1812_CHECKS - rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( - pkts_burst[j], struct rte_ether_hdr *) + 1), - &hops[j], pkts_burst[j]->packet_type); + rfc1812_process( + (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( + pkt, struct rte_ether_hdr *) + + 1), + hop, pkt->packet_type); #endif - /* Set MAC addresses. */ - eth_hdr = rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *); - *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]]; - rte_ether_addr_copy(&ports_eth_addr[hops[j]], - ð_hdr->src_addr); + /* Set MAC addresses. */ + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); + *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop]; + rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr); +} +static inline void +fib_send_single(int nb_tx, struct lcore_conf *qconf, + struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +{ + int32_t j; + + for (j = 0; j < nb_tx; j++) { + process_packet(pkts_burst[j], &hops[j]); + if (hops[j] == BAD_PORT) { + rte_pktmbuf_free(pkts_burst[j]); + continue; + } /* Send single packet. */ send_single_packet(qconf, pkts_burst[j], hops[j]); } @@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, uint32_t ipv4_arr[MAX_PKT_BURST]; uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; - uint16_t nh; + uint16_t nh, hops[MAX_PKT_BURST]; uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; uint32_t ipv4_arr_assem, ipv6_arr_assem; @@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - events[i].mbuf->port = nh; + hops[i] = nh != FIB_DEFAULT_HOP ? + nh : + events[i].mbuf->port; + process_packet(events[i].mbuf, &hops[i]); + events[i].mbuf->port = hops[i] != BAD_PORT ? + hops[i] : + events[i].mbuf->port; } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -fib_process_event_vector(struct rte_event_vector *vec) +fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr, + uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6, + uint32_t *ipv4_arr, uint16_t *hops) { - uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; - uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; uint32_t ipv4_arr_assem, ipv6_arr_assem; struct rte_mbuf **mbufs = vec->mbufs; - uint32_t ipv4_arr[MAX_PKT_BURST]; - uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; struct lcore_conf *lconf; uint16_t nh; @@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector *vec) /* Lookup IPv6 hops if IPv6 packets are present. */ if (ipv6_cnt > 0) - rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr, - hopsv6, ipv6_cnt); - - if (vec->attr_valid) { - nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0]; - if (nh != FIB_DEFAULT_HOP) - vec->port = nh; - else - vec->attr_valid = 0; - } + rte_fib6_lookup_bulk( + lconf->ipv6_lookup_struct, + (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6, + ipv6_cnt); /* Assign ports looked up in fib depending on IPv4 or IPv6 */ for (i = 0; i < vec->nb_elem; i++) { @@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec) else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - mbufs[i]->port = nh; - event_vector_attr_validate(vec, mbufs[i]); + hops[i] = nh; + else + hops[i] = vec->attr_valid ? vec->port : + vec->mbufs[i]->port; } + +#if defined FIB_SEND_MULTI + uint16_t k; + k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != k; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &hops[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#else + for (i = 0; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#endif + + process_event_vector(vec, hops); } static __rte_always_inline void @@ -496,10 +521,37 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, const uint8_t event_d_id = evt_rsrc->event_d_id; const uint16_t deq_len = evt_rsrc->deq_depth; struct rte_event events[MAX_PKT_BURST]; + uint8_t *type_arr, **ipv6_arr, *ptr; int nb_enq = 0, nb_deq = 0, i; - - if (event_p_id < 0) + uint64_t *hopsv4, *hopsv6; + uint32_t *ipv4_arr; + uint16_t *hops; + uintptr_t mem; + + mem = (uintptr_t)rte_zmalloc( + "vector_fib", + (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) + + sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) + + (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) * + evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (mem == 0) return; + ipv4_arr = (uint32_t *)mem; + type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size]; + hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size]; + hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size]; + hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size]; + ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size]; + + ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size]; + for (i = 0; i < evt_rsrc->vector_size; i++) + ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i]; + + if (event_p_id < 0) { + rte_free(mem); + return; + } RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, rte_lcore_id()); @@ -519,10 +571,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - fib_process_event_vector(events[i].vec); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + fib_process_event_vector(events[i].vec, type_arr, + ipv6_arr, hopsv4, hopsv6, + ipv4_arr, hops); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -546,6 +597,7 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free(mem); } int __rte_noinline -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v4 5/5] examples/l3fwd: use em vector path for event vector 2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (2 preceding siblings ...) 2022-10-11 9:08 ` [PATCH v4 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula @ 2022-10-11 9:08 ` pbhagavatula 2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-11 9:08 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use em vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em.c | 13 +++-- examples/l3fwd/l3fwd_em.h | 29 +++++------ examples/l3fwd/l3fwd_em_hlm.h | 72 +++++----------------------- examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++---- examples/l3fwd/l3fwd_event.h | 21 -------- 5 files changed, 48 insertions(+), 112 deletions(-) diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index a203dc9e46..35de31157e 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -860,10 +860,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; unsigned int lcore_id; + uint16_t *dst_ports; if (event_p_id < 0) return; + dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_ports == NULL) + return; lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; @@ -885,13 +890,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, } #if defined RTE_ARCH_X86 || defined __ARM_NEON - l3fwd_em_process_event_vector(events[i].vec, lconf); + l3fwd_em_process_event_vector(events[i].vec, lconf, + dst_ports); #else l3fwd_em_no_opt_process_event_vector(events[i].vec, - lconf); + lconf, dst_ports); #endif - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -915,6 +919,7 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free(dst_ports); } int __rte_noinline diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h index fe2ee59f6a..7d051fc076 100644 --- a/examples/l3fwd/l3fwd_em.h +++ b/examples/l3fwd/l3fwd_em.h @@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid, } } -static __rte_always_inline void +static __rte_always_inline uint16_t l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) { struct rte_ether_hdr *eth_hdr; @@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf); else m->port = BAD_PORT; + + return m->port; } /* @@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events, static inline void l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, + uint16_t *dst_ports) { struct rte_mbuf **mbufs = vec->mbufs; int32_t i; @@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++) rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *)); - /* Process first packet to init vector attributes */ - l3fwd_em_simple_process(mbufs[0], qconf); - if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; - } - /* * Prefetch and forward already prefetched packets. */ - for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { + for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { rte_prefetch0( rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *)); - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); } /* Forward remaining prefetched packets */ - for (; i < vec->nb_elem; i++) { - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); - } + for (; i < vec->nb_elem; i++) + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 12b997e477..2e11eefad7 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_port) { - struct rte_mbuf **mbufs = vec->mbufs; - uint16_t dst_port[MAX_PKT_BURST]; - int32_t i, j, n, pos; - - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++) - rte_prefetch0( - rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); + uint16_t i; if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - - n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { - uint32_t pkt_type = - RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP; - uint32_t l3_type, tcp_or_udp; - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) - pkt_type &= mbufs[j + i]->packet_type; - - l3_type = pkt_type & RTE_PTYPE_L3_MASK; - tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); - - for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; - i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem; - i++, pos++) { - rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos], - struct rte_ether_hdr *) + - 1); - } - - if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { - em_get_dst_port_ipv4xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { - em_get_dst_port_ipv6xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else { - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = - em_get_dst_port(qconf, mbufs[j + i], - mbufs[j + i]->port); - process_packet(mbufs[j + i], - &mbufs[j + i]->port); - event_vector_attr_validate(vec, mbufs[j + i]); - } - continue; - } - processx4_step3(&mbufs[j], &dst_port[j]); - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = dst_port[j + i]; - event_vector_attr_validate(vec, mbufs[j + i]); - } - } - - for (; j < vec->nb_elem; j++) { - mbufs[j]->port = - em_get_dst_port(qconf, mbufs[j], mbufs[j]->port); - process_packet(mbufs[j], &mbufs[j]->port); - event_vector_attr_validate(vec, mbufs[j]); - } + l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port, + vec->port, qconf, 1); + else + for (i = 0; i < vec->nb_elem; i++) + l3fwd_em_process_packets(1, &vec->mbufs[i], + &dst_port[i], + vec->mbufs[i]->port, qconf, 1); + + process_event_vector(vec, dst_port); } #endif /* __L3FWD_EM_HLM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index d2f75edb8a..067f23889a 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events, for (i = 1, j = 0; j < nb_rx; i++, j++) { struct rte_mbuf *mbuf = events[j]->mbuf; + uint16_t port; if (i < nb_rx) { rte_prefetch0(rte_pktmbuf_mtod( events[i]->mbuf, struct rte_ether_hdr *) + 1); } + port = mbuf->port; mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); + if (mbuf->port == BAD_PORT) + mbuf->port = port; } } static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_ports) { + const uint8_t attr_valid = vec->attr_valid; struct rte_mbuf **mbufs = vec->mbufs; int32_t i, j; rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1); - if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - for (i = 0, j = 1; i < vec->nb_elem; i++, j++) { if (j < vec->nb_elem) rte_prefetch0(rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); - mbufs[i]->port = - em_get_dst_port(qconf, mbufs[i], mbufs[i]->port); - process_packet(mbufs[i], &mbufs[i]->port); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = em_get_dst_port(qconf, mbufs[i], + attr_valid ? vec->port : + mbufs[i]->port); } + j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != j; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &dst_ports[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &dst_ports[i]); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_SEQUENTIAL_H__ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index 3fe38aada0..e21817c36b 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) } #endif -static inline void -event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) -{ - /* l3fwd application only changes mbuf port while processing */ - if (vec->attr_valid && (vec->port != mbuf->port)) - vec->attr_valid = 0; -} - -static inline void -event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) -{ - if (vec->attr_valid) { - vec->queue = txq; - } else { - int i; - - for (i = 0; i < vec->nb_elem; i++) - rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq); - } -} - static inline uint16_t filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, uint16_t nb_pkts) -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 1/5] examples/l3fwd: fix port group mask generation 2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (3 preceding siblings ...) 2022-10-11 9:08 ` [PATCH v4 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula @ 2022-10-11 10:12 ` pbhagavatula 2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula ` (5 more replies) 4 siblings, 6 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw) To: jerinj, David Christensen; +Cc: dev, Pavan Nikhilesh, stable From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix port group mask generation in altivec, vec_any_eq returns 0 or 1 while port_groupx4 expects comparison mask result. Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- v5 Changes: - Fix compilation errors. v4 Changes: - Fix missing `rte_free`. v3 Changes: - PPC optimize port mask generation. - Fix aarch32 compilation. v2 Changes: - Fix PPC, RISC-V, aarch32 compilation. examples/common/altivec/port_group.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h index 5e209b02fa..1c05bc025a 100644 --- a/examples/common/altivec/port_group.h +++ b/examples/common/altivec/port_group.h @@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16_t u16[FWDSTEP + 1]; uint64_t u64; } *pnum = (void *)pn; - + __vector unsigned long long result; + const __vector unsigned int perm_mask = {0x00204060, 0x80808080, + 0x80808080, 0x80808080}; int32_t v; - v = vec_any_eq(dp1, dp2); - + dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2); + dp1 = vec_mergeh(dp1, dp1); + result = (__vector unsigned long long)vec_vbpermq( + (__vector unsigned char)dp1, (__vector unsigned char)perm_mask); + v = result[1]; /* update last port counter. */ lp[0] += gptbl[v].lpv; -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 2/5] examples/l3fwd: split processing and send stages 2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula @ 2022-10-11 10:12 ` pbhagavatula 2022-10-17 12:06 ` [EXT] " Shijith Thotton 2022-10-11 10:12 ` [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula ` (4 subsequent siblings) 5 siblings, 1 reply; 41+ messages in thread From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Split packet processing from packet send stage, as send stage is not common for poll and event mode. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++----------- examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++--- examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++------- examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++--- 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index e76f2760b0..12b997e477 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, return portid; } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t *dst_port, uint16_t portid, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i, j, pos; - uint16_t dst_port[MAX_PKT_BURST]; /* * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets @@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, dst_port[j + i] = em_get_dst_port(qconf, pkts_burst[j + i], portid); } + + for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); } - for (; j < nb_rx; j++) + for (; j < nb_rx; j++) { dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &pkts_burst[j]->port); + } +} - send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0); + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } /* @@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, */ int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) { + for (j = 0; j < nb_rx; j++) pkts_burst[j] = ev[j]->mbuf; - rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *) + 1); - } for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { @@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, } continue; } - processx4_step3(&pkts_burst[j], &dst_port[j]); + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) pkts_burst[j + i]->port = dst_port[j + i]; diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h index 0c6852a7bb..adb82f1478 100644 --- a/examples/l3fwd/l3fwd_lpm_altivec.h +++ b/examples/l3fwd/l3fwd_lpm_altivec.h @@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint8_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __vector unsigned int dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h index 78ee83b76c..2a68c4c15e 100644 --- a/examples/l3fwd/l3fwd_lpm_neon.h +++ b/examples/l3fwd/l3fwd_lpm_neon.h @@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf, } } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i = 0, j = 0; - uint16_t dst_port[MAX_PKT_BURST]; int32x4_t dip; uint32_t ipv4_flag; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], void *)); } - for (j = 0; j != k - FWDSTEP; j += FWDSTEP) { for (i = 0; i < FWDSTEP; i++) { rte_prefetch0(rte_pktmbuf_mtod( @@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); } processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); j += FWDSTEP; } @@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, void *)); j++; } - j -= m; /* Classify last up to 3 packets one by one */ switch (m) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); } } +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h index 3f637a23d1..db15030320 100644 --- a/examples/l3fwd/l3fwd_lpm_sse.h +++ b/examples/l3fwd/l3fwd_lpm_sse.h @@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __m128i dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step2(qconf, dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 2/5] examples/l3fwd: split processing and send stages 2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula @ 2022-10-17 12:06 ` Shijith Thotton 0 siblings, 0 replies; 41+ messages in thread From: Shijith Thotton @ 2022-10-17 12:06 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh Bhagavatula > >Split packet processing from packet send stage, as send stage >is not common for poll and event mode. > >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Shijith Thotton <sthotton@marvell.com> >--- > examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++----------- > examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++--- > examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++------- > examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++--- > 4 files changed, 95 insertions(+), 29 deletions(-) > >diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h >index e76f2760b0..12b997e477 100644 >--- a/examples/l3fwd/l3fwd_em_hlm.h >+++ b/examples/l3fwd/l3fwd_em_hlm.h >@@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct >rte_mbuf *pkt, > return portid; > } > >-/* >- * Buffer optimized handling of packets, invoked >- * from main_loop. >- */ > static inline void >-l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, >- uint16_t portid, struct lcore_conf *qconf) >+l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, >+ uint16_t *dst_port, uint16_t portid, >+ struct lcore_conf *qconf, const uint8_t do_step3) > { > int32_t i, j, pos; >- uint16_t dst_port[MAX_PKT_BURST]; > > /* > * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets >@@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf >**pkts_burst, > dst_port[j + i] = em_get_dst_port(qconf, > pkts_burst[j + i], portid); > } >+ >+ for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += >FWDSTEP) >+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); > } > >- for (; j < nb_rx; j++) >+ for (; j < nb_rx; j++) { > dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &pkts_burst[j]->port); >+ } >+} > >- send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); >+/* >+ * Buffer optimized handling of packets, invoked >+ * from main_loop. >+ */ >+static inline void >+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t >portid, >+ struct lcore_conf *qconf) >+{ >+ uint16_t dst_port[MAX_PKT_BURST]; > >+ l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, >0); >+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); > } > > /* >@@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event >**ev, > */ > int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT); > >- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) { >+ for (j = 0; j < nb_rx; j++) > pkts_burst[j] = ev[j]->mbuf; >- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], >- struct rte_ether_hdr *) + 1); >- } > > for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { > >@@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event >**ev, > } > continue; > } >- processx4_step3(&pkts_burst[j], &dst_port[j]); >+ for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP) >+ processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); > > for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) > pkts_burst[j + i]->port = dst_port[j + i]; >diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h >b/examples/l3fwd/l3fwd_lpm_altivec.h >index 0c6852a7bb..adb82f1478 100644 >--- a/examples/l3fwd/l3fwd_lpm_altivec.h >+++ b/examples/l3fwd/l3fwd_lpm_altivec.h >@@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf, > * from main_loop. > */ > static inline void >-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, >- uint8_t portid, struct lcore_conf *qconf) >+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, >+ uint8_t portid, uint16_t *dst_port, >+ struct lcore_conf *qconf, const uint8_t do_step3) > { > int32_t j; >- uint16_t dst_port[MAX_PKT_BURST]; > __vector unsigned int dip[MAX_PKT_BURST / FWDSTEP]; > uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; > const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); >@@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf >**pkts_burst, > ipv4_flag[j / FWDSTEP], > portid, &pkts_burst[j], &dst_port[j]); > >+ if (do_step3) >+ for (j = 0; j != k; j += FWDSTEP) >+ processx4_step3(&pkts_burst[j], &dst_port[j]); >+ > /* Classify last up to 3 packets one by one */ > switch (nb_rx % FWDSTEP) { > case 3: > dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &dst_port[j]); > j++; > /* fall-through */ > case 2: > dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &dst_port[j]); > j++; > /* fall-through */ > case 1: > dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &dst_port[j]); > j++; > /* fall-through */ > } >+} >+ >+static inline void >+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t >portid, >+ struct lcore_conf *qconf) >+{ >+ uint16_t dst_port[MAX_PKT_BURST]; > >+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, >+ 0); > send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); > } > >diff --git a/examples/l3fwd/l3fwd_lpm_neon.h >b/examples/l3fwd/l3fwd_lpm_neon.h >index 78ee83b76c..2a68c4c15e 100644 >--- a/examples/l3fwd/l3fwd_lpm_neon.h >+++ b/examples/l3fwd/l3fwd_lpm_neon.h >@@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf, > } > } > >-/* >- * Buffer optimized handling of packets, invoked >- * from main_loop. >- */ > static inline void >-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, >- uint16_t portid, struct lcore_conf *qconf) >+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, >+ uint16_t portid, uint16_t *dst_port, >+ struct lcore_conf *qconf, const uint8_t do_step3) > { > int32_t i = 0, j = 0; >- uint16_t dst_port[MAX_PKT_BURST]; > int32x4_t dip; > uint32_t ipv4_flag; > const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); >@@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf >**pkts_burst, > rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], > void *)); > } >- > for (j = 0; j != k - FWDSTEP; j += FWDSTEP) { > for (i = 0; i < FWDSTEP; i++) { > rte_prefetch0(rte_pktmbuf_mtod( >@@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf >**pkts_burst, > processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); > processx4_step2(qconf, dip, ipv4_flag, portid, > &pkts_burst[j], &dst_port[j]); >+ if (do_step3) >+ processx4_step3(&pkts_burst[j], &dst_port[j]); > } > > processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); > processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], > &dst_port[j]); >+ if (do_step3) >+ processx4_step3(&pkts_burst[j], &dst_port[j]); > > j += FWDSTEP; > } >@@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf >**pkts_burst, > void *)); > j++; > } >- > j -= m; > /* Classify last up to 3 packets one by one */ > switch (m) { > case 3: > dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], > portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &dst_port[j]); > j++; > /* fallthrough */ > case 2: > dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], > portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &dst_port[j]); > j++; > /* fallthrough */ > case 1: > dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], > portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &dst_port[j]); > } > } >+} >+ >+/* >+ * Buffer optimized handling of packets, invoked >+ * from main_loop. >+ */ >+static inline void >+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t >portid, >+ struct lcore_conf *qconf) >+{ >+ uint16_t dst_port[MAX_PKT_BURST]; > >+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, >+ 0); > send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); > } > >diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h >index 3f637a23d1..db15030320 100644 >--- a/examples/l3fwd/l3fwd_lpm_sse.h >+++ b/examples/l3fwd/l3fwd_lpm_sse.h >@@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf, > * from main_loop. > */ > static inline void >-l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, >- uint16_t portid, struct lcore_conf *qconf) >+l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, >+ uint16_t portid, uint16_t *dst_port, >+ struct lcore_conf *qconf, const uint8_t do_step3) > { > int32_t j; >- uint16_t dst_port[MAX_PKT_BURST]; > __m128i dip[MAX_PKT_BURST / FWDSTEP]; > uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; > const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); >@@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf >**pkts_burst, > processx4_step2(qconf, dip[j / FWDSTEP], > ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], >&dst_port[j]); > >+ if (do_step3) >+ for (j = 0; j != k; j += FWDSTEP) >+ processx4_step3(&pkts_burst[j], &dst_port[j]); >+ > /* Classify last up to 3 packets one by one */ > switch (nb_rx % FWDSTEP) { > case 3: > dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &dst_port[j]); > j++; > /* fall-through */ > case 2: > dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &dst_port[j]); > j++; > /* fall-through */ > case 1: > dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); >+ if (do_step3) >+ process_packet(pkts_burst[j], &dst_port[j]); > j++; > } >+} >+ >+static inline void >+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t >portid, >+ struct lcore_conf *qconf) >+{ >+ uint16_t dst_port[MAX_PKT_BURST]; > >+ l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, >+ 0); > send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); > } > >-- >2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector 2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula @ 2022-10-11 10:12 ` pbhagavatula 2022-10-17 12:06 ` [EXT] " Shijith Thotton 2022-10-11 10:12 ` [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula ` (3 subsequent siblings) 5 siblings, 1 reply; 41+ messages in thread From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw) To: jerinj, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use lpm vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++ examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++ examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++-------- examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++ examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++ 5 files changed, 214 insertions(+), 16 deletions(-) diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h index 87018f5dbe..e45e138e59 100644 --- a/examples/l3fwd/l3fwd_altivec.h +++ b/examples/l3fwd/l3fwd_altivec.h @@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __vector unsigned short dp1; + __vector unsigned short dp; + + dp = (__vector unsigned short)vec_splats((short)dst_ports[0]); + dp1 = *((__vector unsigned short *)&dst_ports[i]); + res = vec_all_eq(dp1, dp); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_ALTIVEC_H_ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index b93841a16f..3fe38aada0 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -82,6 +82,27 @@ struct l3fwd_event_resources { uint64_t vector_tmo_ns; }; +#if defined(RTE_ARCH_X86) +#include "l3fwd_sse.h" +#elif defined __ARM_NEON +#include "l3fwd_neon.h" +#elif defined(RTE_ARCH_PPC_64) +#include "l3fwd_altivec.h" +#else +static inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + int i; + + for (i = 0; i < nb_elem; i++) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + } + + return dst_ports[0]; +} +#endif + static inline void event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) { @@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) } } +static inline uint16_t +filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, + uint16_t nb_pkts) +{ + uint16_t *des_pos, free = 0; + struct rte_mbuf **pos; + int i; + + /* Filter out and free bad packets */ + for (i = 0; i < nb_pkts; i++) { + if (dst_port[i] == BAD_PORT) { + rte_pktmbuf_free(mbufs[i]); + if (!free) { + pos = &mbufs[i]; + des_pos = &dst_port[i]; + } + free++; + continue; + } + + if (free) { + *pos = mbufs[i]; + pos++; + *des_pos = dst_port[i]; + des_pos++; + } + } + return nb_pkts - free; +} + +static inline void +process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port) +{ + uint16_t port, i; + + vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem); + /* Verify destination array */ + port = process_dst_port(dst_port, vec->nb_elem); + if (port == BAD_PORT) { + vec->attr_valid = 0; + for (i = 0; i < vec->nb_elem; i++) { + vec->mbufs[i]->port = dst_port[i]; + rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0); + } + } else { + vec->attr_valid = 1; + vec->port = port; + vec->queue = 0; + } +} struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void); void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf); diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index 22d7f61a42..5172979c72 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf) +lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf, + uint16_t *dst_port) { struct rte_mbuf **mbufs = vec->mbufs; int i; - /* Process first packet to init vector attributes */ - lpm_process_event_pkt(lconf, mbufs[0]); +#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64 if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; + l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port, + dst_port, lconf, 1); + } else { + for (i = 0; i < vec->nb_elem; i++) + l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port, + &dst_port[i], lconf, 1); } +#else + for (i = 0; i < vec->nb_elem; i++) + dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]); +#endif - for (i = 1; i < vec->nb_elem; i++) { - lpm_process_event_pkt(lconf, mbufs[i]); - event_vector_attr_validate(vec, mbufs[i]); - } + process_event_vector(vec, dst_port); } /* Same eventdev loop for single and burst of vector */ @@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, struct rte_event events[MAX_PKT_BURST]; int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; + uint16_t *dst_port_list; unsigned int lcore_id; if (event_p_id < 0) @@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; - + dst_port_list = + rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_port_list == NULL) + return; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); while (!force_quit) { @@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - lpm_process_event_vector(events[i].vec, lconf); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + lpm_process_event_vector(events[i].vec, lconf, + dst_port_list); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free(dst_port_list); } int __rte_noinline diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h index ce515e0bc4..bf365341fb 100644 --- a/examples/l3fwd/l3fwd_neon.h +++ b/examples/l3fwd/l3fwd_neon.h @@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0; + +#if defined(RTE_ARCH_ARM64) + uint16_t res; + + while (nb_elem > 7) { + uint16x8_t dp = vdupq_n_u16(dst_ports[0]); + uint16x8_t dp1; + + dp1 = vld1q_u16(&dst_ports[i]); + dp1 = vceqq_u16(dp1, dp); + res = vminvq_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + uint16x4_t dp = vdup_n_u16(dst_ports[0]); + uint16x4_t dp1; + + dp1 = vld1_u16(&dst_ports[i]); + dp1 = vceq_u16(dp1, dp); + res = vminv_u16(dp1); + if (!res) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } +#endif + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_NEON_H_ */ diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h index 0f0d0323a2..083729cdef 100644 --- a/examples/l3fwd/l3fwd_sse.h +++ b/examples/l3fwd/l3fwd_sse.h @@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + res = _mm_movemask_epi8(dp1); + if (res != 0xFFFF) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + res = _mm_movemask_ps((__m128)dp1); + if (res != 0xF) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_SSE_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector 2022-10-11 10:12 ` [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula @ 2022-10-17 12:06 ` Shijith Thotton 0 siblings, 0 replies; 41+ messages in thread From: Shijith Thotton @ 2022-10-17 12:06 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh Bhagavatula > >Use lpm vector path to process event vector. > >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Shijith Thotton <sthotton@marvell.com> >--- > examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++ > examples/l3fwd/l3fwd_event.h | 71 >++++++++++++++++++++++++++++++++++ > examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++-------- > examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++ > examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++ > 5 files changed, 214 insertions(+), 16 deletions(-) > >diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h >index 87018f5dbe..e45e138e59 100644 >--- a/examples/l3fwd/l3fwd_altivec.h >+++ b/examples/l3fwd/l3fwd_altivec.h >@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct >rte_mbuf **pkts_burst, > } > } > >+static __rte_always_inline uint16_t >+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) >+{ >+ uint16_t i = 0, res; >+ >+ while (nb_elem > 7) { >+ __vector unsigned short dp1; >+ __vector unsigned short dp; >+ >+ dp = (__vector unsigned short)vec_splats((short)dst_ports[0]); >+ dp1 = *((__vector unsigned short *)&dst_ports[i]); >+ res = vec_all_eq(dp1, dp); >+ if (!res) >+ return BAD_PORT; >+ >+ nb_elem -= 8; >+ i += 8; >+ } >+ >+ while (nb_elem) { >+ if (dst_ports[i] != dst_ports[0]) >+ return BAD_PORT; >+ nb_elem--; >+ i++; >+ } >+ >+ return dst_ports[0]; >+} >+ > #endif /* _L3FWD_ALTIVEC_H_ */ >diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h >index b93841a16f..3fe38aada0 100644 >--- a/examples/l3fwd/l3fwd_event.h >+++ b/examples/l3fwd/l3fwd_event.h >@@ -82,6 +82,27 @@ struct l3fwd_event_resources { > uint64_t vector_tmo_ns; > }; > >+#if defined(RTE_ARCH_X86) >+#include "l3fwd_sse.h" >+#elif defined __ARM_NEON >+#include "l3fwd_neon.h" >+#elif defined(RTE_ARCH_PPC_64) >+#include "l3fwd_altivec.h" >+#else >+static inline uint16_t >+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) >+{ >+ int i; >+ >+ for (i = 0; i < nb_elem; i++) { >+ if (dst_ports[i] != dst_ports[0]) >+ return BAD_PORT; >+ } >+ >+ return dst_ports[0]; >+} >+#endif >+ > static inline void > event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf >*mbuf) > { >@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, >uint16_t txq) > } > } > >+static inline uint16_t >+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, >+ uint16_t nb_pkts) >+{ >+ uint16_t *des_pos, free = 0; >+ struct rte_mbuf **pos; >+ int i; >+ >+ /* Filter out and free bad packets */ >+ for (i = 0; i < nb_pkts; i++) { >+ if (dst_port[i] == BAD_PORT) { >+ rte_pktmbuf_free(mbufs[i]); >+ if (!free) { >+ pos = &mbufs[i]; >+ des_pos = &dst_port[i]; >+ } >+ free++; >+ continue; >+ } >+ >+ if (free) { >+ *pos = mbufs[i]; >+ pos++; >+ *des_pos = dst_port[i]; >+ des_pos++; >+ } >+ } > >+ return nb_pkts - free; >+} >+ >+static inline void >+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port) >+{ >+ uint16_t port, i; >+ >+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec- >>nb_elem); >+ /* Verify destination array */ >+ port = process_dst_port(dst_port, vec->nb_elem); >+ if (port == BAD_PORT) { >+ vec->attr_valid = 0; >+ for (i = 0; i < vec->nb_elem; i++) { >+ vec->mbufs[i]->port = dst_port[i]; >+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0); >+ } >+ } else { >+ vec->attr_valid = 1; >+ vec->port = port; >+ vec->queue = 0; >+ } >+} > > struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void); > void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf); >diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c >index 22d7f61a42..5172979c72 100644 >--- a/examples/l3fwd/l3fwd_lpm.c >+++ b/examples/l3fwd/l3fwd_lpm.c >@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void >*dummy) > } > > static __rte_always_inline void >-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf >*lconf) >+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf >*lconf, >+ uint16_t *dst_port) > { > struct rte_mbuf **mbufs = vec->mbufs; > int i; > >- /* Process first packet to init vector attributes */ >- lpm_process_event_pkt(lconf, mbufs[0]); >+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined >RTE_ARCH_PPC_64 > if (vec->attr_valid) { >- if (mbufs[0]->port != BAD_PORT) >- vec->port = mbufs[0]->port; >- else >- vec->attr_valid = 0; >+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port, >+ dst_port, lconf, 1); >+ } else { >+ for (i = 0; i < vec->nb_elem; i++) >+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port, >+ &dst_port[i], lconf, 1); > } >+#else >+ for (i = 0; i < vec->nb_elem; i++) >+ dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]); >+#endif > >- for (i = 1; i < vec->nb_elem; i++) { >- lpm_process_event_pkt(lconf, mbufs[i]); >- event_vector_attr_validate(vec, mbufs[i]); >- } >+ process_event_vector(vec, dst_port); > } > > /* Same eventdev loop for single and burst of vector */ >@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > struct rte_event events[MAX_PKT_BURST]; > int i, nb_enq = 0, nb_deq = 0; > struct lcore_conf *lconf; >+ uint16_t *dst_port_list; > unsigned int lcore_id; > > if (event_p_id < 0) >@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > > lcore_id = rte_lcore_id(); > lconf = &lcore_conf[lcore_id]; >- >+ dst_port_list = >+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, >+ RTE_CACHE_LINE_SIZE); >+ if (dst_port_list == NULL) >+ return; > RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); > > while (!force_quit) { >@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > events[i].op = RTE_EVENT_OP_FORWARD; > } > >- lpm_process_event_vector(events[i].vec, lconf); >- >- if (flags & L3FWD_EVENT_TX_DIRECT) >- event_vector_txq_set(events[i].vec, 0); >+ lpm_process_event_vector(events[i].vec, lconf, >+ dst_port_list); > } > > if (flags & L3FWD_EVENT_TX_ENQ) { >@@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > > l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, > nb_deq, 1); >+ rte_free(dst_port_list); > } > > int __rte_noinline >diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h >index ce515e0bc4..bf365341fb 100644 >--- a/examples/l3fwd/l3fwd_neon.h >+++ b/examples/l3fwd/l3fwd_neon.h >@@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct >rte_mbuf **pkts_burst, > } > } > >+static __rte_always_inline uint16_t >+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) >+{ >+ uint16_t i = 0; >+ >+#if defined(RTE_ARCH_ARM64) >+ uint16_t res; >+ >+ while (nb_elem > 7) { >+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]); >+ uint16x8_t dp1; >+ >+ dp1 = vld1q_u16(&dst_ports[i]); >+ dp1 = vceqq_u16(dp1, dp); >+ res = vminvq_u16(dp1); >+ if (!res) >+ return BAD_PORT; >+ >+ nb_elem -= 8; >+ i += 8; >+ } >+ >+ while (nb_elem > 3) { >+ uint16x4_t dp = vdup_n_u16(dst_ports[0]); >+ uint16x4_t dp1; >+ >+ dp1 = vld1_u16(&dst_ports[i]); >+ dp1 = vceq_u16(dp1, dp); >+ res = vminv_u16(dp1); >+ if (!res) >+ return BAD_PORT; >+ >+ nb_elem -= 4; >+ i += 4; >+ } >+#endif >+ >+ while (nb_elem) { >+ if (dst_ports[i] != dst_ports[0]) >+ return BAD_PORT; >+ nb_elem--; >+ i++; >+ } >+ >+ return dst_ports[0]; >+} >+ > #endif /* _L3FWD_NEON_H_ */ >diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h >index 0f0d0323a2..083729cdef 100644 >--- a/examples/l3fwd/l3fwd_sse.h >+++ b/examples/l3fwd/l3fwd_sse.h >@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct >rte_mbuf **pkts_burst, > } > } > >+static __rte_always_inline uint16_t >+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) >+{ >+ uint16_t i = 0, res; >+ >+ while (nb_elem > 7) { >+ __m128i dp = _mm_set1_epi16(dst_ports[0]); >+ __m128i dp1; >+ >+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); >+ dp1 = _mm_cmpeq_epi16(dp1, dp); >+ res = _mm_movemask_epi8(dp1); >+ if (res != 0xFFFF) >+ return BAD_PORT; >+ >+ nb_elem -= 8; >+ i += 8; >+ } >+ >+ while (nb_elem > 3) { >+ __m128i dp = _mm_set1_epi16(dst_ports[0]); >+ __m128i dp1; >+ >+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); >+ dp1 = _mm_cmpeq_epi16(dp1, dp); >+ dp1 = _mm_unpacklo_epi16(dp1, dp1); >+ res = _mm_movemask_ps((__m128)dp1); >+ if (res != 0xF) >+ return BAD_PORT; >+ >+ nb_elem -= 4; >+ i += 4; >+ } >+ >+ while (nb_elem) { >+ if (dst_ports[i] != dst_ports[0]) >+ return BAD_PORT; >+ nb_elem--; >+ i++; >+ } >+ >+ return dst_ports[0]; >+} >+ > #endif /* _L3FWD_SSE_H_ */ >-- >2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib 2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-10-11 10:12 ` [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula @ 2022-10-11 10:12 ` pbhagavatula 2022-10-17 12:06 ` [EXT] " Shijith Thotton 2022-10-11 10:12 ` [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula ` (2 subsequent siblings) 5 siblings, 1 reply; 41+ messages in thread From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix stack overflow when event vector size is greater than MAX_BURST_SIZE. Add missing mac swap and rfc1812 stage. Fixes: e8adca1951d4 ("examples/l3fwd: support event vector") Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_fib.c | 130 ++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 39 deletions(-) diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c index b82e0c0354..edc0dd69b9 100644 --- a/examples/l3fwd/l3fwd_fib.c +++ b/examples/l3fwd/l3fwd_fib.c @@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf, */ #if !defined FIB_SEND_MULTI static inline void -fib_send_single(int nb_tx, struct lcore_conf *qconf, - struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +process_packet(struct rte_mbuf *pkt, uint16_t *hop) { - int32_t j; struct rte_ether_hdr *eth_hdr; - for (j = 0; j < nb_tx; j++) { - /* Run rfc1812 if packet is ipv4 and checks enabled. */ + /* Run rfc1812 if packet is ipv4 and checks enabled. */ #if defined DO_RFC_1812_CHECKS - rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( - pkts_burst[j], struct rte_ether_hdr *) + 1), - &hops[j], pkts_burst[j]->packet_type); + rfc1812_process( + (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( + pkt, struct rte_ether_hdr *) + + 1), + hop, pkt->packet_type); #endif - /* Set MAC addresses. */ - eth_hdr = rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *); - *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]]; - rte_ether_addr_copy(&ports_eth_addr[hops[j]], - ð_hdr->src_addr); + /* Set MAC addresses. */ + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); + *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop]; + rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr); +} +static inline void +fib_send_single(int nb_tx, struct lcore_conf *qconf, + struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +{ + int32_t j; + + for (j = 0; j < nb_tx; j++) { + process_packet(pkts_burst[j], &hops[j]); + if (hops[j] == BAD_PORT) { + rte_pktmbuf_free(pkts_burst[j]); + continue; + } /* Send single packet. */ send_single_packet(qconf, pkts_burst[j], hops[j]); } @@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, uint32_t ipv4_arr[MAX_PKT_BURST]; uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; - uint16_t nh; + uint16_t nh, hops[MAX_PKT_BURST]; uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; uint32_t ipv4_arr_assem, ipv6_arr_assem; @@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - events[i].mbuf->port = nh; + hops[i] = nh != FIB_DEFAULT_HOP ? + nh : + events[i].mbuf->port; + process_packet(events[i].mbuf, &hops[i]); + events[i].mbuf->port = hops[i] != BAD_PORT ? + hops[i] : + events[i].mbuf->port; } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -fib_process_event_vector(struct rte_event_vector *vec) +fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr, + uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6, + uint32_t *ipv4_arr, uint16_t *hops) { - uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; - uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; uint32_t ipv4_arr_assem, ipv6_arr_assem; struct rte_mbuf **mbufs = vec->mbufs; - uint32_t ipv4_arr[MAX_PKT_BURST]; - uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; struct lcore_conf *lconf; uint16_t nh; @@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector *vec) /* Lookup IPv6 hops if IPv6 packets are present. */ if (ipv6_cnt > 0) - rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr, - hopsv6, ipv6_cnt); - - if (vec->attr_valid) { - nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0]; - if (nh != FIB_DEFAULT_HOP) - vec->port = nh; - else - vec->attr_valid = 0; - } + rte_fib6_lookup_bulk( + lconf->ipv6_lookup_struct, + (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6, + ipv6_cnt); /* Assign ports looked up in fib depending on IPv4 or IPv6 */ for (i = 0; i < vec->nb_elem; i++) { @@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec) else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - mbufs[i]->port = nh; - event_vector_attr_validate(vec, mbufs[i]); + hops[i] = nh; + else + hops[i] = vec->attr_valid ? vec->port : + vec->mbufs[i]->port; } + +#if defined FIB_SEND_MULTI + uint16_t k; + k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != k; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &hops[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#else + for (i = 0; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#endif + + process_event_vector(vec, hops); } static __rte_always_inline void @@ -496,10 +521,37 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, const uint8_t event_d_id = evt_rsrc->event_d_id; const uint16_t deq_len = evt_rsrc->deq_depth; struct rte_event events[MAX_PKT_BURST]; + uint8_t *type_arr, **ipv6_arr, *ptr; int nb_enq = 0, nb_deq = 0, i; - - if (event_p_id < 0) + uint64_t *hopsv4, *hopsv6; + uint32_t *ipv4_arr; + uint16_t *hops; + uintptr_t mem; + + mem = (uintptr_t)rte_zmalloc( + "vector_fib", + (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) + + sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) + + (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) * + evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (mem == 0) return; + ipv4_arr = (uint32_t *)mem; + type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size]; + hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size]; + hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size]; + hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size]; + ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size]; + + ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size]; + for (i = 0; i < evt_rsrc->vector_size; i++) + ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i]; + + if (event_p_id < 0) { + rte_free((void *)mem); + return; + } RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, rte_lcore_id()); @@ -519,10 +571,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - fib_process_event_vector(events[i].vec); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + fib_process_event_vector(events[i].vec, type_arr, + ipv6_arr, hopsv4, hopsv6, + ipv4_arr, hops); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -546,6 +597,7 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free((void *)mem); } int __rte_noinline -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib 2022-10-11 10:12 ` [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula @ 2022-10-17 12:06 ` Shijith Thotton 0 siblings, 0 replies; 41+ messages in thread From: Shijith Thotton @ 2022-10-17 12:06 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran Cc: dev, Pavan Nikhilesh Bhagavatula > >Fix stack overflow when event vector size is greater than >MAX_BURST_SIZE. >Add missing mac swap and rfc1812 stage. > >Fixes: e8adca1951d4 ("examples/l3fwd: support event vector") > >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Shijith Thotton <sthotton@marvell.com> >--- > examples/l3fwd/l3fwd_fib.c | 130 ++++++++++++++++++++++++++----------- > 1 file changed, 91 insertions(+), 39 deletions(-) > >diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c >index b82e0c0354..edc0dd69b9 100644 >--- a/examples/l3fwd/l3fwd_fib.c >+++ b/examples/l3fwd/l3fwd_fib.c >@@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf, > */ > #if !defined FIB_SEND_MULTI > static inline void >-fib_send_single(int nb_tx, struct lcore_conf *qconf, >- struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) >+process_packet(struct rte_mbuf *pkt, uint16_t *hop) > { >- int32_t j; > struct rte_ether_hdr *eth_hdr; > >- for (j = 0; j < nb_tx; j++) { >- /* Run rfc1812 if packet is ipv4 and checks enabled. */ >+ /* Run rfc1812 if packet is ipv4 and checks enabled. */ > #if defined DO_RFC_1812_CHECKS >- rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( >- pkts_burst[j], struct rte_ether_hdr *) + 1), >- &hops[j], pkts_burst[j]->packet_type); >+ rfc1812_process( >+ (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( >+ pkt, struct rte_ether_hdr *) + >+ 1), >+ hop, pkt->packet_type); > #endif > >- /* Set MAC addresses. */ >- eth_hdr = rte_pktmbuf_mtod(pkts_burst[j], >- struct rte_ether_hdr *); >- *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]]; >- rte_ether_addr_copy(&ports_eth_addr[hops[j]], >- ð_hdr->src_addr); >+ /* Set MAC addresses. */ >+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); >+ *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop]; >+ rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr); >+} > >+static inline void >+fib_send_single(int nb_tx, struct lcore_conf *qconf, >+ struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) >+{ >+ int32_t j; >+ >+ for (j = 0; j < nb_tx; j++) { >+ process_packet(pkts_burst[j], &hops[j]); >+ if (hops[j] == BAD_PORT) { >+ rte_pktmbuf_free(pkts_burst[j]); >+ continue; >+ } > /* Send single packet. */ > send_single_packet(qconf, pkts_burst[j], hops[j]); > } >@@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, > uint32_t ipv4_arr[MAX_PKT_BURST]; > uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; > uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; >- uint16_t nh; >+ uint16_t nh, hops[MAX_PKT_BURST]; > uint8_t type_arr[MAX_PKT_BURST]; > uint32_t ipv4_cnt, ipv6_cnt; > uint32_t ipv4_arr_assem, ipv6_arr_assem; >@@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, > else > nh = (uint16_t)hopsv6[ipv6_arr_assem++]; > if (nh != FIB_DEFAULT_HOP) >- events[i].mbuf->port = nh; >+ hops[i] = nh != FIB_DEFAULT_HOP ? >+ nh : >+ events[i].mbuf->port; >+ process_packet(events[i].mbuf, &hops[i]); >+ events[i].mbuf->port = hops[i] != BAD_PORT ? >+ hops[i] : >+ events[i].mbuf->port; > } > > if (flags & L3FWD_EVENT_TX_ENQ) { >@@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void >*dummy) > } > > static __rte_always_inline void >-fib_process_event_vector(struct rte_event_vector *vec) >+fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr, >+ uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6, >+ uint32_t *ipv4_arr, uint16_t *hops) > { >- uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; >- uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; > uint32_t ipv4_arr_assem, ipv6_arr_assem; > struct rte_mbuf **mbufs = vec->mbufs; >- uint32_t ipv4_arr[MAX_PKT_BURST]; >- uint8_t type_arr[MAX_PKT_BURST]; > uint32_t ipv4_cnt, ipv6_cnt; > struct lcore_conf *lconf; > uint16_t nh; >@@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector >*vec) > > /* Lookup IPv6 hops if IPv6 packets are present. */ > if (ipv6_cnt > 0) >- rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr, >- hopsv6, ipv6_cnt); >- >- if (vec->attr_valid) { >- nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0]; >- if (nh != FIB_DEFAULT_HOP) >- vec->port = nh; >- else >- vec->attr_valid = 0; >- } >+ rte_fib6_lookup_bulk( >+ lconf->ipv6_lookup_struct, >+ (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6, >+ ipv6_cnt); > > /* Assign ports looked up in fib depending on IPv4 or IPv6 */ > for (i = 0; i < vec->nb_elem; i++) { >@@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec) > else > nh = (uint16_t)hopsv6[ipv6_arr_assem++]; > if (nh != FIB_DEFAULT_HOP) >- mbufs[i]->port = nh; >- event_vector_attr_validate(vec, mbufs[i]); >+ hops[i] = nh; >+ else >+ hops[i] = vec->attr_valid ? vec->port : >+ vec->mbufs[i]->port; > } >+ >+#if defined FIB_SEND_MULTI >+ uint16_t k; >+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); >+ >+ for (i = 0; i != k; i += FWDSTEP) >+ processx4_step3(&vec->mbufs[i], &hops[i]); >+ for (; i < vec->nb_elem; i++) >+ process_packet(vec->mbufs[i], &hops[i]); >+#else >+ for (i = 0; i < vec->nb_elem; i++) >+ process_packet(vec->mbufs[i], &hops[i]); >+#endif >+ >+ process_event_vector(vec, hops); > } > > static __rte_always_inline void >@@ -496,10 +521,37 @@ fib_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > const uint8_t event_d_id = evt_rsrc->event_d_id; > const uint16_t deq_len = evt_rsrc->deq_depth; > struct rte_event events[MAX_PKT_BURST]; >+ uint8_t *type_arr, **ipv6_arr, *ptr; > int nb_enq = 0, nb_deq = 0, i; >- >- if (event_p_id < 0) >+ uint64_t *hopsv4, *hopsv6; >+ uint32_t *ipv4_arr; >+ uint16_t *hops; >+ uintptr_t mem; >+ >+ mem = (uintptr_t)rte_zmalloc( >+ "vector_fib", >+ (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) + >+ sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) + >+ (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) * >+ evt_rsrc->vector_size, >+ RTE_CACHE_LINE_SIZE); >+ if (mem == 0) > return; >+ ipv4_arr = (uint32_t *)mem; >+ type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size]; >+ hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size]; >+ hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size]; >+ hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size]; >+ ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size]; >+ >+ ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size]; >+ for (i = 0; i < evt_rsrc->vector_size; i++) >+ ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i]; >+ >+ if (event_p_id < 0) { >+ rte_free((void *)mem); >+ return; >+ } > > RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, > rte_lcore_id()); >@@ -519,10 +571,9 @@ fib_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > events[i].op = RTE_EVENT_OP_FORWARD; > } > >- fib_process_event_vector(events[i].vec); >- >- if (flags & L3FWD_EVENT_TX_DIRECT) >- event_vector_txq_set(events[i].vec, 0); >+ fib_process_event_vector(events[i].vec, type_arr, >+ ipv6_arr, hopsv4, hopsv6, >+ ipv4_arr, hops); > } > > if (flags & L3FWD_EVENT_TX_ENQ) { >@@ -546,6 +597,7 @@ fib_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > > l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, > nb_deq, 1); >+ rte_free((void *)mem); > } > > int __rte_noinline >-- >2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector 2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (2 preceding siblings ...) 2022-10-11 10:12 ` [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula @ 2022-10-11 10:12 ` pbhagavatula 2022-10-12 8:57 ` [EXT] " Shijith Thotton 2022-10-17 12:05 ` [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation Shijith Thotton 2022-10-25 16:05 ` [PATCH v6 " pbhagavatula 5 siblings, 1 reply; 41+ messages in thread From: pbhagavatula @ 2022-10-11 10:12 UTC (permalink / raw) To: jerinj; +Cc: dev, Pavan Nikhilesh From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use em vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- examples/l3fwd/l3fwd_em.c | 13 +++-- examples/l3fwd/l3fwd_em.h | 29 +++++------ examples/l3fwd/l3fwd_em_hlm.h | 72 +++++----------------------- examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++---- examples/l3fwd/l3fwd_event.h | 21 -------- 5 files changed, 48 insertions(+), 112 deletions(-) diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index a203dc9e46..35de31157e 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -860,10 +860,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; unsigned int lcore_id; + uint16_t *dst_ports; if (event_p_id < 0) return; + dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_ports == NULL) + return; lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; @@ -885,13 +890,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, } #if defined RTE_ARCH_X86 || defined __ARM_NEON - l3fwd_em_process_event_vector(events[i].vec, lconf); + l3fwd_em_process_event_vector(events[i].vec, lconf, + dst_ports); #else l3fwd_em_no_opt_process_event_vector(events[i].vec, - lconf); + lconf, dst_ports); #endif - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -915,6 +919,7 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free(dst_ports); } int __rte_noinline diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h index fe2ee59f6a..7d051fc076 100644 --- a/examples/l3fwd/l3fwd_em.h +++ b/examples/l3fwd/l3fwd_em.h @@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid, } } -static __rte_always_inline void +static __rte_always_inline uint16_t l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) { struct rte_ether_hdr *eth_hdr; @@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf); else m->port = BAD_PORT; + + return m->port; } /* @@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events, static inline void l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, + uint16_t *dst_ports) { struct rte_mbuf **mbufs = vec->mbufs; int32_t i; @@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++) rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *)); - /* Process first packet to init vector attributes */ - l3fwd_em_simple_process(mbufs[0], qconf); - if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; - } - /* * Prefetch and forward already prefetched packets. */ - for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { + for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { rte_prefetch0( rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *)); - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); } /* Forward remaining prefetched packets */ - for (; i < vec->nb_elem; i++) { - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); - } + for (; i < vec->nb_elem; i++) + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 12b997e477..2e11eefad7 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_port) { - struct rte_mbuf **mbufs = vec->mbufs; - uint16_t dst_port[MAX_PKT_BURST]; - int32_t i, j, n, pos; - - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++) - rte_prefetch0( - rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); + uint16_t i; if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - - n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { - uint32_t pkt_type = - RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP; - uint32_t l3_type, tcp_or_udp; - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) - pkt_type &= mbufs[j + i]->packet_type; - - l3_type = pkt_type & RTE_PTYPE_L3_MASK; - tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); - - for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; - i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem; - i++, pos++) { - rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos], - struct rte_ether_hdr *) + - 1); - } - - if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { - em_get_dst_port_ipv4xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { - em_get_dst_port_ipv6xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else { - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = - em_get_dst_port(qconf, mbufs[j + i], - mbufs[j + i]->port); - process_packet(mbufs[j + i], - &mbufs[j + i]->port); - event_vector_attr_validate(vec, mbufs[j + i]); - } - continue; - } - processx4_step3(&mbufs[j], &dst_port[j]); - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = dst_port[j + i]; - event_vector_attr_validate(vec, mbufs[j + i]); - } - } - - for (; j < vec->nb_elem; j++) { - mbufs[j]->port = - em_get_dst_port(qconf, mbufs[j], mbufs[j]->port); - process_packet(mbufs[j], &mbufs[j]->port); - event_vector_attr_validate(vec, mbufs[j]); - } + l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port, + vec->port, qconf, 1); + else + for (i = 0; i < vec->nb_elem; i++) + l3fwd_em_process_packets(1, &vec->mbufs[i], + &dst_port[i], + vec->mbufs[i]->port, qconf, 1); + + process_event_vector(vec, dst_port); } #endif /* __L3FWD_EM_HLM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index d2f75edb8a..067f23889a 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events, for (i = 1, j = 0; j < nb_rx; i++, j++) { struct rte_mbuf *mbuf = events[j]->mbuf; + uint16_t port; if (i < nb_rx) { rte_prefetch0(rte_pktmbuf_mtod( events[i]->mbuf, struct rte_ether_hdr *) + 1); } + port = mbuf->port; mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); + if (mbuf->port == BAD_PORT) + mbuf->port = port; } } static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_ports) { + const uint8_t attr_valid = vec->attr_valid; struct rte_mbuf **mbufs = vec->mbufs; int32_t i, j; rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1); - if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - for (i = 0, j = 1; i < vec->nb_elem; i++, j++) { if (j < vec->nb_elem) rte_prefetch0(rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); - mbufs[i]->port = - em_get_dst_port(qconf, mbufs[i], mbufs[i]->port); - process_packet(mbufs[i], &mbufs[i]->port); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = em_get_dst_port(qconf, mbufs[i], + attr_valid ? vec->port : + mbufs[i]->port); } + j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != j; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &dst_ports[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &dst_ports[i]); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_SEQUENTIAL_H__ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index 3fe38aada0..e21817c36b 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) } #endif -static inline void -event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) -{ - /* l3fwd application only changes mbuf port while processing */ - if (vec->attr_valid && (vec->port != mbuf->port)) - vec->attr_valid = 0; -} - -static inline void -event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) -{ - if (vec->attr_valid) { - vec->queue = txq; - } else { - int i; - - for (i = 0; i < vec->nb_elem; i++) - rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq); - } -} - static inline uint16_t filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, uint16_t nb_pkts) -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector 2022-10-11 10:12 ` [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula @ 2022-10-12 8:57 ` Shijith Thotton 0 siblings, 0 replies; 41+ messages in thread From: Shijith Thotton @ 2022-10-12 8:57 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran Cc: dev, Pavan Nikhilesh Bhagavatula >From: Pavan Nikhilesh <pbhagavatula@marvell.com> > >Use em vector path to process event vector. > >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> >--- > examples/l3fwd/l3fwd_em.c | 13 +++-- > examples/l3fwd/l3fwd_em.h | 29 +++++------ > examples/l3fwd/l3fwd_em_hlm.h | 72 +++++----------------------- > examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++---- > examples/l3fwd/l3fwd_event.h | 21 -------- > 5 files changed, 48 insertions(+), 112 deletions(-) > >diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c >index a203dc9e46..35de31157e 100644 >--- a/examples/l3fwd/l3fwd_em.c >+++ b/examples/l3fwd/l3fwd_em.c >@@ -860,10 +860,15 @@ em_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > int i, nb_enq = 0, nb_deq = 0; > struct lcore_conf *lconf; > unsigned int lcore_id; >+ uint16_t *dst_ports; > > if (event_p_id < 0) > return; > >+ dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, >+ RTE_CACHE_LINE_SIZE); >+ if (dst_ports == NULL) >+ return; > lcore_id = rte_lcore_id(); > lconf = &lcore_conf[lcore_id]; > >@@ -885,13 +890,12 @@ em_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > } > > #if defined RTE_ARCH_X86 || defined __ARM_NEON >- l3fwd_em_process_event_vector(events[i].vec, lconf); >+ l3fwd_em_process_event_vector(events[i].vec, lconf, >+ dst_ports); > #else > l3fwd_em_no_opt_process_event_vector(events[i].vec, >- lconf); >+ lconf, dst_ports); > #endif >- if (flags & L3FWD_EVENT_TX_DIRECT) >- event_vector_txq_set(events[i].vec, 0); > } > > if (flags & L3FWD_EVENT_TX_ENQ) { >@@ -915,6 +919,7 @@ em_event_loop_vector(struct l3fwd_event_resources >*evt_rsrc, > > l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, > nb_deq, 1); >+ rte_free(dst_ports); > } > > int __rte_noinline >diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h >index fe2ee59f6a..7d051fc076 100644 >--- a/examples/l3fwd/l3fwd_em.h >+++ b/examples/l3fwd/l3fwd_em.h >@@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t >portid, > } > } > >-static __rte_always_inline void >+static __rte_always_inline uint16_t > l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) > { > struct rte_ether_hdr *eth_hdr; >@@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct >lcore_conf *qconf) > m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf); > else > m->port = BAD_PORT; >+ >+ return m->port; > } > > /* >@@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct >rte_event **events, > > static inline void > l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, >- struct lcore_conf *qconf) >+ struct lcore_conf *qconf, >+ uint16_t *dst_ports) > { > struct rte_mbuf **mbufs = vec->mbufs; > int32_t i; >@@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct >rte_event_vector *vec, > for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++) > rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *)); > >- /* Process first packet to init vector attributes */ >- l3fwd_em_simple_process(mbufs[0], qconf); >- if (vec->attr_valid) { >- if (mbufs[0]->port != BAD_PORT) >- vec->port = mbufs[0]->port; >- else >- vec->attr_valid = 0; >- } >- > /* > * Prefetch and forward already prefetched packets. > */ >- for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { >+ for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { > rte_prefetch0( > rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void >*)); >- l3fwd_em_simple_process(mbufs[i], qconf); >- event_vector_attr_validate(vec, mbufs[i]); >+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); > } > > /* Forward remaining prefetched packets */ >- for (; i < vec->nb_elem; i++) { >- l3fwd_em_simple_process(mbufs[i], qconf); >- event_vector_attr_validate(vec, mbufs[i]); >- } >+ for (; i < vec->nb_elem; i++) >+ dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); >+ >+ process_event_vector(vec, dst_ports); > } > > #endif /* __L3FWD_EM_H__ */ >diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h >index 12b997e477..2e11eefad7 100644 >--- a/examples/l3fwd/l3fwd_em_hlm.h >+++ b/examples/l3fwd/l3fwd_em_hlm.h >@@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event >**ev, > > static inline void > l3fwd_em_process_event_vector(struct rte_event_vector *vec, >- struct lcore_conf *qconf) >+ struct lcore_conf *qconf, uint16_t *dst_port) > { >- struct rte_mbuf **mbufs = vec->mbufs; >- uint16_t dst_port[MAX_PKT_BURST]; >- int32_t i, j, n, pos; >- >- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++) >- rte_prefetch0( >- rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); >+ uint16_t i; > > if (vec->attr_valid) >- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); >- >- n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT); >- for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { >- uint32_t pkt_type = >- RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | >RTE_PTYPE_L4_UDP; >- uint32_t l3_type, tcp_or_udp; >- >- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) >- pkt_type &= mbufs[j + i]->packet_type; >- >- l3_type = pkt_type & RTE_PTYPE_L3_MASK; >- tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | >RTE_PTYPE_L4_UDP); >- >- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; >- i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem; >- i++, pos++) { >- rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos], >- struct rte_ether_hdr *) + >- 1); >- } >- >- if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { >- em_get_dst_port_ipv4xN_events(qconf, &mbufs[j], >- &dst_port[j]); >- } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { >- em_get_dst_port_ipv6xN_events(qconf, &mbufs[j], >- &dst_port[j]); >- } else { >- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { >- mbufs[j + i]->port = >- em_get_dst_port(qconf, mbufs[j + i], >- mbufs[j + i]->port); >- process_packet(mbufs[j + i], >- &mbufs[j + i]->port); >- event_vector_attr_validate(vec, mbufs[j + i]); >- } >- continue; >- } >- processx4_step3(&mbufs[j], &dst_port[j]); >- >- for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { >- mbufs[j + i]->port = dst_port[j + i]; >- event_vector_attr_validate(vec, mbufs[j + i]); >- } >- } >- >- for (; j < vec->nb_elem; j++) { >- mbufs[j]->port = >- em_get_dst_port(qconf, mbufs[j], mbufs[j]->port); >- process_packet(mbufs[j], &mbufs[j]->port); >- event_vector_attr_validate(vec, mbufs[j]); >- } >+ l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, >dst_port, >+ vec->port, qconf, 1); >+ else >+ for (i = 0; i < vec->nb_elem; i++) >+ l3fwd_em_process_packets(1, &vec->mbufs[i], >+ &dst_port[i], >+ vec->mbufs[i]->port, qconf, 1); >+ >+ process_event_vector(vec, dst_port); > } > > #endif /* __L3FWD_EM_HLM_H__ */ >diff --git a/examples/l3fwd/l3fwd_em_sequential.h >b/examples/l3fwd/l3fwd_em_sequential.h >index d2f75edb8a..067f23889a 100644 >--- a/examples/l3fwd/l3fwd_em_sequential.h >+++ b/examples/l3fwd/l3fwd_em_sequential.h >@@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event >**events, > > for (i = 1, j = 0; j < nb_rx; i++, j++) { > struct rte_mbuf *mbuf = events[j]->mbuf; >+ uint16_t port; > > if (i < nb_rx) { > rte_prefetch0(rte_pktmbuf_mtod( > events[i]->mbuf, > struct rte_ether_hdr *) + 1); > } >+ port = mbuf->port; > mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port); > process_packet(mbuf, &mbuf->port); >+ if (mbuf->port == BAD_PORT) >+ mbuf->port = port; > } > } > > static inline void > l3fwd_em_process_event_vector(struct rte_event_vector *vec, >- struct lcore_conf *qconf) >+ struct lcore_conf *qconf, uint16_t *dst_ports) > { >+ const uint8_t attr_valid = vec->attr_valid; > struct rte_mbuf **mbufs = vec->mbufs; > int32_t i, j; > > rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1); > >- if (vec->attr_valid) >- vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); >- > for (i = 0, j = 1; i < vec->nb_elem; i++, j++) { > if (j < vec->nb_elem) > rte_prefetch0(rte_pktmbuf_mtod(mbufs[j], > struct rte_ether_hdr *) + > 1); >- mbufs[i]->port = >- em_get_dst_port(qconf, mbufs[i], mbufs[i]->port); >- process_packet(mbufs[i], &mbufs[i]->port); >- event_vector_attr_validate(vec, mbufs[i]); >+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i], >+ attr_valid ? vec->port : >+ mbufs[i]->port); > } >+ j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); >+ >+ for (i = 0; i != j; i += FWDSTEP) >+ processx4_step3(&vec->mbufs[i], &dst_ports[i]); >+ for (; i < vec->nb_elem; i++) >+ process_packet(vec->mbufs[i], &dst_ports[i]); >+ >+ process_event_vector(vec, dst_ports); > } > > #endif /* __L3FWD_EM_SEQUENTIAL_H__ */ >diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h >index 3fe38aada0..e21817c36b 100644 >--- a/examples/l3fwd/l3fwd_event.h >+++ b/examples/l3fwd/l3fwd_event.h >@@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t >nb_elem) > } > #endif > >-static inline void >-event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf >*mbuf) >-{ >- /* l3fwd application only changes mbuf port while processing */ >- if (vec->attr_valid && (vec->port != mbuf->port)) >- vec->attr_valid = 0; >-} >- >-static inline void >-event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) >-{ >- if (vec->attr_valid) { >- vec->queue = txq; >- } else { >- int i; >- >- for (i = 0; i < vec->nb_elem; i++) >- rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq); >- } >-} >- > static inline uint16_t > filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, > uint16_t nb_pkts) >-- >2.25.1 For the series: Acked-by: Shijith Thotton <sthotton@marvell.com> ^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation 2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (3 preceding siblings ...) 2022-10-11 10:12 ` [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula @ 2022-10-17 12:05 ` Shijith Thotton 2022-10-20 16:15 ` Pavan Nikhilesh Bhagavatula 2022-10-25 16:05 ` [PATCH v6 " pbhagavatula 5 siblings, 1 reply; 41+ messages in thread From: Shijith Thotton @ 2022-10-17 12:05 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, David Christensen Cc: dev, Pavan Nikhilesh Bhagavatula, stable > >Fix port group mask generation in altivec, vec_any_eq returns >0 or 1 while port_groupx4 expects comparison mask result. > >Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc") >Cc: stable@dpdk.org > >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Shijith Thotton <sthotton@marvell.com> >--- > v5 Changes: > - Fix compilation errors. > > v4 Changes: > - Fix missing `rte_free`. > > v3 Changes: > - PPC optimize port mask generation. > - Fix aarch32 compilation. > > v2 Changes: > - Fix PPC, RISC-V, aarch32 compilation. > > examples/common/altivec/port_group.h | 11 ++++++++--- > 1 file changed, 8 insertions(+), 3 deletions(-) > >diff --git a/examples/common/altivec/port_group.h >b/examples/common/altivec/port_group.h >index 5e209b02fa..1c05bc025a 100644 >--- a/examples/common/altivec/port_group.h >+++ b/examples/common/altivec/port_group.h >@@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, > uint16_t u16[FWDSTEP + 1]; > uint64_t u64; > } *pnum = (void *)pn; >- >+ __vector unsigned long long result; >+ const __vector unsigned int perm_mask = {0x00204060, 0x80808080, >+ 0x80808080, 0x80808080}; > int32_t v; > >- v = vec_any_eq(dp1, dp2); >- >+ dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2); >+ dp1 = vec_mergeh(dp1, dp1); >+ result = (__vector unsigned long long)vec_vbpermq( >+ (__vector unsigned char)dp1, (__vector unsigned >char)perm_mask); > >+ v = result[1]; > /* update last port counter. */ > lp[0] += gptbl[v].lpv; > >-- >2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* RE: [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation 2022-10-17 12:05 ` [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation Shijith Thotton @ 2022-10-20 16:15 ` Pavan Nikhilesh Bhagavatula 0 siblings, 0 replies; 41+ messages in thread From: Pavan Nikhilesh Bhagavatula @ 2022-10-20 16:15 UTC (permalink / raw) To: Shijith Thotton, Jerin Jacob Kollanukkaran, David Christensen; +Cc: dev, stable > -----Original Message----- > From: Shijith Thotton <sthotton@marvell.com> > Sent: Monday, October 17, 2022 5:36 PM > To: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>; Jerin Jacob > Kollanukkaran <jerinj@marvell.com>; David Christensen > <drc@linux.vnet.ibm.com> > Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula > <pbhagavatula@marvell.com>; stable@dpdk.org > Subject: RE: [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask > generation > > > > >Fix port group mask generation in altivec, vec_any_eq returns > >0 or 1 while port_groupx4 expects comparison mask result. > > > >Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on > powerpc") > >Cc: stable@dpdk.org > > > >Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Acked-by: Shijith Thotton <sthotton@marvell.com> > Thomas, Will this series make it into 22.11 release? > >--- > > v5 Changes: > > - Fix compilation errors. > > > > v4 Changes: > > - Fix missing `rte_free`. > > > > v3 Changes: > > - PPC optimize port mask generation. > > - Fix aarch32 compilation. > > > > v2 Changes: > > - Fix PPC, RISC-V, aarch32 compilation. > > > > examples/common/altivec/port_group.h | 11 ++++++++--- > > 1 file changed, 8 insertions(+), 3 deletions(-) > > > >diff --git a/examples/common/altivec/port_group.h > >b/examples/common/altivec/port_group.h > >index 5e209b02fa..1c05bc025a 100644 > >--- a/examples/common/altivec/port_group.h > >+++ b/examples/common/altivec/port_group.h > >@@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t > *lp, > > uint16_t u16[FWDSTEP + 1]; > > uint64_t u64; > > } *pnum = (void *)pn; > >- > >+ __vector unsigned long long result; > >+ const __vector unsigned int perm_mask = {0x00204060, 0x80808080, > >+ 0x80808080, 0x80808080}; > > int32_t v; > > > >- v = vec_any_eq(dp1, dp2); > >- > >+ dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2); > >+ dp1 = vec_mergeh(dp1, dp1); > >+ result = (__vector unsigned long long)vec_vbpermq( > >+ (__vector unsigned char)dp1, (__vector unsigned > >char)perm_mask); > > > >+ v = result[1]; > > /* update last port counter. */ > > lp[0] += gptbl[v].lpv; > > > >-- > >2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 1/5] examples/l3fwd: fix port group mask generation 2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula ` (4 preceding siblings ...) 2022-10-17 12:05 ` [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation Shijith Thotton @ 2022-10-25 16:05 ` pbhagavatula 2022-10-25 16:05 ` [PATCH v6 2/5] examples/l3fwd: split processing and send stages pbhagavatula ` (4 more replies) 5 siblings, 5 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw) To: jerinj, thomas, David Christensen Cc: dev, Pavan Nikhilesh, stable, Shijith Thotton From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix port group mask generation in altivec, vec_any_eq returns 0 or 1 while port_groupx4 expects comparison mask result. Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Shijith Thotton <sthotton@marvell.com> --- v6 Changes: - Minor optimiazation to process_dst_port NEON. v5 Changes: - Fix compilation errors. v4 Changes: - Fix missing `rte_free`. v3 Changes: - PPC optimize port mask generation. - Fix aarch32 compilation. v2 Changes: - Fix PPC, RISC-V, aarch32 compilation. examples/common/altivec/port_group.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/common/altivec/port_group.h b/examples/common/altivec/port_group.h index 5e209b02fa..1c05bc025a 100644 --- a/examples/common/altivec/port_group.h +++ b/examples/common/altivec/port_group.h @@ -26,12 +26,17 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16_t u16[FWDSTEP + 1]; uint64_t u64; } *pnum = (void *)pn; - + __vector unsigned long long result; + const __vector unsigned int perm_mask = {0x00204060, 0x80808080, + 0x80808080, 0x80808080}; int32_t v; - v = vec_any_eq(dp1, dp2); - + dp1 = (__vector unsigned short)vec_cmpeq(dp1, dp2); + dp1 = vec_mergeh(dp1, dp1); + result = (__vector unsigned long long)vec_vbpermq( + (__vector unsigned char)dp1, (__vector unsigned char)perm_mask); + v = result[1]; /* update last port counter. */ lp[0] += gptbl[v].lpv; -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 2/5] examples/l3fwd: split processing and send stages 2022-10-25 16:05 ` [PATCH v6 " pbhagavatula @ 2022-10-25 16:05 ` pbhagavatula 2022-10-25 16:05 ` [PATCH v6 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula ` (3 subsequent siblings) 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw) To: jerinj, thomas, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh, Shijith Thotton From: Pavan Nikhilesh <pbhagavatula@marvell.com> Split packet processing from packet send stage, as send stage is not common for poll and event mode. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Shijith Thotton <sthotton@marvell.com> --- examples/l3fwd/l3fwd_em_hlm.h | 39 +++++++++++++++++++----------- examples/l3fwd/l3fwd_lpm_altivec.h | 25 ++++++++++++++++--- examples/l3fwd/l3fwd_lpm_neon.h | 35 ++++++++++++++++++++------- examples/l3fwd/l3fwd_lpm_sse.h | 25 ++++++++++++++++--- 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index e76f2760b0..12b997e477 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -177,16 +177,12 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, return portid; } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t *dst_port, uint16_t portid, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i, j, pos; - uint16_t dst_port[MAX_PKT_BURST]; /* * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets @@ -233,13 +229,30 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, dst_port[j + i] = em_get_dst_port(qconf, pkts_burst[j + i], portid); } + + for (i = 0; i < EM_HASH_LOOKUP_COUNT && do_step3; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); } - for (; j < nb_rx; j++) + for (; j < nb_rx; j++) { dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &pkts_burst[j]->port); + } +} - send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_em_process_packets(nb_rx, pkts_burst, dst_port, portid, qconf, 0); + send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } /* @@ -260,11 +273,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, */ int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) { + for (j = 0; j < nb_rx; j++) pkts_burst[j] = ev[j]->mbuf; - rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *) + 1); - } for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { @@ -305,7 +315,8 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, } continue; } - processx4_step3(&pkts_burst[j], &dst_port[j]); + for (i = 0; i < EM_HASH_LOOKUP_COUNT; i += FWDSTEP) + processx4_step3(&pkts_burst[j + i], &dst_port[j + i]); for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) pkts_burst[j + i]->port = dst_port[j + i]; diff --git a/examples/l3fwd/l3fwd_lpm_altivec.h b/examples/l3fwd/l3fwd_lpm_altivec.h index 0c6852a7bb..adb82f1478 100644 --- a/examples/l3fwd/l3fwd_lpm_altivec.h +++ b/examples/l3fwd/l3fwd_lpm_altivec.h @@ -96,11 +96,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint8_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint8_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __vector unsigned int dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -114,22 +114,41 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h index 78ee83b76c..2a68c4c15e 100644 --- a/examples/l3fwd/l3fwd_lpm_neon.h +++ b/examples/l3fwd/l3fwd_lpm_neon.h @@ -80,16 +80,12 @@ processx4_step2(const struct lcore_conf *qconf, } } -/* - * Buffer optimized handling of packets, invoked - * from main_loop. - */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t i = 0, j = 0; - uint16_t dst_port[MAX_PKT_BURST]; int32x4_t dip; uint32_t ipv4_flag; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -100,7 +96,6 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], void *)); } - for (j = 0; j != k - FWDSTEP; j += FWDSTEP) { for (i = 0; i < FWDSTEP; i++) { rte_prefetch0(rte_pktmbuf_mtod( @@ -111,11 +106,15 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); } processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + processx4_step3(&pkts_burst[j], &dst_port[j]); j += FWDSTEP; } @@ -138,26 +137,44 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, void *)); j++; } - j -= m; /* Classify last up to 3 packets one by one */ switch (m) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fallthrough */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); } } +} + +/* + * Buffer optimized handling of packets, invoked + * from main_loop. + */ +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h index 3f637a23d1..db15030320 100644 --- a/examples/l3fwd/l3fwd_lpm_sse.h +++ b/examples/l3fwd/l3fwd_lpm_sse.h @@ -82,11 +82,11 @@ processx4_step2(const struct lcore_conf *qconf, * from main_loop. */ static inline void -l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, - uint16_t portid, struct lcore_conf *qconf) +l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst, + uint16_t portid, uint16_t *dst_port, + struct lcore_conf *qconf, const uint8_t do_step3) { int32_t j; - uint16_t dst_port[MAX_PKT_BURST]; __m128i dip[MAX_PKT_BURST / FWDSTEP]; uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); @@ -99,21 +99,40 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, processx4_step2(qconf, dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); + if (do_step3) + for (j = 0; j != k; j += FWDSTEP) + processx4_step3(&pkts_burst[j], &dst_port[j]); + /* Classify last up to 3 packets one by one */ switch (nb_rx % FWDSTEP) { case 3: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 2: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; /* fall-through */ case 1: dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); + if (do_step3) + process_packet(pkts_burst[j], &dst_port[j]); j++; } +} + +static inline void +l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint16_t portid, + struct lcore_conf *qconf) +{ + uint16_t dst_port[MAX_PKT_BURST]; + l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf, + 0); send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); } -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 3/5] examples/l3fwd: use lpm vector path for event vector 2022-10-25 16:05 ` [PATCH v6 " pbhagavatula 2022-10-25 16:05 ` [PATCH v6 2/5] examples/l3fwd: split processing and send stages pbhagavatula @ 2022-10-25 16:05 ` pbhagavatula 2022-10-25 16:05 ` [PATCH v6 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula ` (2 subsequent siblings) 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw) To: jerinj, thomas, David Christensen, Ruifeng Wang, Bruce Richardson, Konstantin Ananyev Cc: dev, Pavan Nikhilesh, Shijith Thotton From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use lpm vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Shijith Thotton <sthotton@marvell.com> --- examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++ examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++ examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++-------- examples/l3fwd/l3fwd_neon.h | 48 +++++++++++++++++++++++ examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++ 5 files changed, 215 insertions(+), 16 deletions(-) diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h index 87018f5dbe..e45e138e59 100644 --- a/examples/l3fwd/l3fwd_altivec.h +++ b/examples/l3fwd/l3fwd_altivec.h @@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __vector unsigned short dp1; + __vector unsigned short dp; + + dp = (__vector unsigned short)vec_splats((short)dst_ports[0]); + dp1 = *((__vector unsigned short *)&dst_ports[i]); + res = vec_all_eq(dp1, dp); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_ALTIVEC_H_ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index b93841a16f..3fe38aada0 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -82,6 +82,27 @@ struct l3fwd_event_resources { uint64_t vector_tmo_ns; }; +#if defined(RTE_ARCH_X86) +#include "l3fwd_sse.h" +#elif defined __ARM_NEON +#include "l3fwd_neon.h" +#elif defined(RTE_ARCH_PPC_64) +#include "l3fwd_altivec.h" +#else +static inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + int i; + + for (i = 0; i < nb_elem; i++) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + } + + return dst_ports[0]; +} +#endif + static inline void event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) { @@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) } } +static inline uint16_t +filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, + uint16_t nb_pkts) +{ + uint16_t *des_pos, free = 0; + struct rte_mbuf **pos; + int i; + + /* Filter out and free bad packets */ + for (i = 0; i < nb_pkts; i++) { + if (dst_port[i] == BAD_PORT) { + rte_pktmbuf_free(mbufs[i]); + if (!free) { + pos = &mbufs[i]; + des_pos = &dst_port[i]; + } + free++; + continue; + } + + if (free) { + *pos = mbufs[i]; + pos++; + *des_pos = dst_port[i]; + des_pos++; + } + } + return nb_pkts - free; +} + +static inline void +process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port) +{ + uint16_t port, i; + + vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem); + /* Verify destination array */ + port = process_dst_port(dst_port, vec->nb_elem); + if (port == BAD_PORT) { + vec->attr_valid = 0; + for (i = 0; i < vec->nb_elem; i++) { + vec->mbufs[i]->port = dst_port[i]; + rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0); + } + } else { + vec->attr_valid = 1; + vec->port = port; + vec->queue = 0; + } +} struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void); void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf); diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index 22d7f61a42..5172979c72 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf) +lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf, + uint16_t *dst_port) { struct rte_mbuf **mbufs = vec->mbufs; int i; - /* Process first packet to init vector attributes */ - lpm_process_event_pkt(lconf, mbufs[0]); +#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64 if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; + l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port, + dst_port, lconf, 1); + } else { + for (i = 0; i < vec->nb_elem; i++) + l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port, + &dst_port[i], lconf, 1); } +#else + for (i = 0; i < vec->nb_elem; i++) + dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]); +#endif - for (i = 1; i < vec->nb_elem; i++) { - lpm_process_event_pkt(lconf, mbufs[i]); - event_vector_attr_validate(vec, mbufs[i]); - } + process_event_vector(vec, dst_port); } /* Same eventdev loop for single and burst of vector */ @@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, struct rte_event events[MAX_PKT_BURST]; int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; + uint16_t *dst_port_list; unsigned int lcore_id; if (event_p_id < 0) @@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; - + dst_port_list = + rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_port_list == NULL) + return; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); while (!force_quit) { @@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - lpm_process_event_vector(events[i].vec, lconf); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + lpm_process_event_vector(events[i].vec, lconf, + dst_port_list); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free(dst_port_list); } int __rte_noinline diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h index ce515e0bc4..40807d5965 100644 --- a/examples/l3fwd/l3fwd_neon.h +++ b/examples/l3fwd/l3fwd_neon.h @@ -194,4 +194,52 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0; + +#if defined(RTE_ARCH_ARM64) + uint64_t res; + + while (nb_elem > 7) { + uint16x8_t dp = vdupq_n_u16(dst_ports[0]); + uint16x8_t dp1; + + dp1 = vld1q_u16(&dst_ports[i]); + dp1 = vceqq_u16(dp1, dp); + res = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(dp1, 4)), + 0); + if (res != ~0ULL) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + uint16x4_t dp = vdup_n_u16(dst_ports[0]); + uint16x4_t dp1; + + dp1 = vld1_u16(&dst_ports[i]); + dp1 = vceq_u16(dp1, dp); + res = vget_lane_u64(vreinterpret_u64_u16(dp1), 0); + if (res != ~0ULL) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } +#endif + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_NEON_H_ */ diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h index 0f0d0323a2..083729cdef 100644 --- a/examples/l3fwd/l3fwd_sse.h +++ b/examples/l3fwd/l3fwd_sse.h @@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + res = _mm_movemask_epi8(dp1); + if (res != 0xFFFF) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + res = _mm_movemask_ps((__m128)dp1); + if (res != 0xF) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_SSE_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 4/5] examples/l3fwd: fix event vector processing in fib 2022-10-25 16:05 ` [PATCH v6 " pbhagavatula 2022-10-25 16:05 ` [PATCH v6 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-10-25 16:05 ` [PATCH v6 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula @ 2022-10-25 16:05 ` pbhagavatula 2022-10-25 16:05 ` [PATCH v6 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula 2022-10-31 14:52 ` [PATCH v6 1/5] examples/l3fwd: fix port group mask generation Thomas Monjalon 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw) To: jerinj, thomas; +Cc: dev, Pavan Nikhilesh, Shijith Thotton From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix stack overflow when event vector size is greater than MAX_BURST_SIZE. Add missing mac swap and rfc1812 stage. Fixes: e8adca1951d4 ("examples/l3fwd: support event vector") Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Shijith Thotton <sthotton@marvell.com> --- examples/l3fwd/l3fwd_fib.c | 130 ++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 39 deletions(-) diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c index b82e0c0354..edc0dd69b9 100644 --- a/examples/l3fwd/l3fwd_fib.c +++ b/examples/l3fwd/l3fwd_fib.c @@ -77,27 +77,37 @@ fib_parse_packet(struct rte_mbuf *mbuf, */ #if !defined FIB_SEND_MULTI static inline void -fib_send_single(int nb_tx, struct lcore_conf *qconf, - struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +process_packet(struct rte_mbuf *pkt, uint16_t *hop) { - int32_t j; struct rte_ether_hdr *eth_hdr; - for (j = 0; j < nb_tx; j++) { - /* Run rfc1812 if packet is ipv4 and checks enabled. */ + /* Run rfc1812 if packet is ipv4 and checks enabled. */ #if defined DO_RFC_1812_CHECKS - rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( - pkts_burst[j], struct rte_ether_hdr *) + 1), - &hops[j], pkts_burst[j]->packet_type); + rfc1812_process( + (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod( + pkt, struct rte_ether_hdr *) + + 1), + hop, pkt->packet_type); #endif - /* Set MAC addresses. */ - eth_hdr = rte_pktmbuf_mtod(pkts_burst[j], - struct rte_ether_hdr *); - *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[hops[j]]; - rte_ether_addr_copy(&ports_eth_addr[hops[j]], - ð_hdr->src_addr); + /* Set MAC addresses. */ + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); + *(uint64_t *)ð_hdr->dst_addr = dest_eth_addr[*hop]; + rte_ether_addr_copy(&ports_eth_addr[*hop], ð_hdr->src_addr); +} +static inline void +fib_send_single(int nb_tx, struct lcore_conf *qconf, + struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx]) +{ + int32_t j; + + for (j = 0; j < nb_tx; j++) { + process_packet(pkts_burst[j], &hops[j]); + if (hops[j] == BAD_PORT) { + rte_pktmbuf_free(pkts_burst[j]); + continue; + } /* Send single packet. */ send_single_packet(qconf, pkts_burst[j], hops[j]); } @@ -261,7 +271,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, uint32_t ipv4_arr[MAX_PKT_BURST]; uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; - uint16_t nh; + uint16_t nh, hops[MAX_PKT_BURST]; uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; uint32_t ipv4_arr_assem, ipv6_arr_assem; @@ -350,7 +360,13 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - events[i].mbuf->port = nh; + hops[i] = nh != FIB_DEFAULT_HOP ? + nh : + events[i].mbuf->port; + process_packet(events[i].mbuf, &hops[i]); + events[i].mbuf->port = hops[i] != BAD_PORT ? + hops[i] : + events[i].mbuf->port; } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -418,14 +434,12 @@ fib_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -fib_process_event_vector(struct rte_event_vector *vec) +fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr, + uint8_t **ipv6_arr, uint64_t *hopsv4, uint64_t *hopsv6, + uint32_t *ipv4_arr, uint16_t *hops) { - uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE]; - uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST]; uint32_t ipv4_arr_assem, ipv6_arr_assem; struct rte_mbuf **mbufs = vec->mbufs; - uint32_t ipv4_arr[MAX_PKT_BURST]; - uint8_t type_arr[MAX_PKT_BURST]; uint32_t ipv4_cnt, ipv6_cnt; struct lcore_conf *lconf; uint16_t nh; @@ -463,16 +477,10 @@ fib_process_event_vector(struct rte_event_vector *vec) /* Lookup IPv6 hops if IPv6 packets are present. */ if (ipv6_cnt > 0) - rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct, ipv6_arr, - hopsv6, ipv6_cnt); - - if (vec->attr_valid) { - nh = type_arr[0] ? (uint16_t)hopsv4[0] : (uint16_t)hopsv6[0]; - if (nh != FIB_DEFAULT_HOP) - vec->port = nh; - else - vec->attr_valid = 0; - } + rte_fib6_lookup_bulk( + lconf->ipv6_lookup_struct, + (uint8_t(*)[RTE_FIB6_IPV6_ADDR_SIZE])ipv6_arr, hopsv6, + ipv6_cnt); /* Assign ports looked up in fib depending on IPv4 or IPv6 */ for (i = 0; i < vec->nb_elem; i++) { @@ -481,9 +489,26 @@ fib_process_event_vector(struct rte_event_vector *vec) else nh = (uint16_t)hopsv6[ipv6_arr_assem++]; if (nh != FIB_DEFAULT_HOP) - mbufs[i]->port = nh; - event_vector_attr_validate(vec, mbufs[i]); + hops[i] = nh; + else + hops[i] = vec->attr_valid ? vec->port : + vec->mbufs[i]->port; } + +#if defined FIB_SEND_MULTI + uint16_t k; + k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != k; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &hops[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#else + for (i = 0; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &hops[i]); +#endif + + process_event_vector(vec, hops); } static __rte_always_inline void @@ -496,10 +521,37 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, const uint8_t event_d_id = evt_rsrc->event_d_id; const uint16_t deq_len = evt_rsrc->deq_depth; struct rte_event events[MAX_PKT_BURST]; + uint8_t *type_arr, **ipv6_arr, *ptr; int nb_enq = 0, nb_deq = 0, i; - - if (event_p_id < 0) + uint64_t *hopsv4, *hopsv6; + uint32_t *ipv4_arr; + uint16_t *hops; + uintptr_t mem; + + mem = (uintptr_t)rte_zmalloc( + "vector_fib", + (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint64_t) + + sizeof(uint64_t) + sizeof(uint16_t) + sizeof(uint8_t *) + + (sizeof(uint8_t) * RTE_FIB6_IPV6_ADDR_SIZE)) * + evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (mem == 0) return; + ipv4_arr = (uint32_t *)mem; + type_arr = (uint8_t *)&ipv4_arr[evt_rsrc->vector_size]; + hopsv4 = (uint64_t *)&type_arr[evt_rsrc->vector_size]; + hopsv6 = (uint64_t *)&hopsv4[evt_rsrc->vector_size]; + hops = (uint16_t *)&hopsv6[evt_rsrc->vector_size]; + ipv6_arr = (uint8_t **)&hops[evt_rsrc->vector_size]; + + ptr = (uint8_t *)&ipv6_arr[evt_rsrc->vector_size]; + for (i = 0; i < evt_rsrc->vector_size; i++) + ipv6_arr[i] = &ptr[RTE_FIB6_IPV6_ADDR_SIZE + i]; + + if (event_p_id < 0) { + rte_free((void *)mem); + return; + } RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, rte_lcore_id()); @@ -519,10 +571,9 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - fib_process_event_vector(events[i].vec); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + fib_process_event_vector(events[i].vec, type_arr, + ipv6_arr, hopsv4, hopsv6, + ipv4_arr, hops); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -546,6 +597,7 @@ fib_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free((void *)mem); } int __rte_noinline -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH v6 5/5] examples/l3fwd: use em vector path for event vector 2022-10-25 16:05 ` [PATCH v6 " pbhagavatula ` (2 preceding siblings ...) 2022-10-25 16:05 ` [PATCH v6 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula @ 2022-10-25 16:05 ` pbhagavatula 2022-10-31 14:52 ` [PATCH v6 1/5] examples/l3fwd: fix port group mask generation Thomas Monjalon 4 siblings, 0 replies; 41+ messages in thread From: pbhagavatula @ 2022-10-25 16:05 UTC (permalink / raw) To: jerinj, thomas; +Cc: dev, Pavan Nikhilesh, Shijith Thotton From: Pavan Nikhilesh <pbhagavatula@marvell.com> Use em vector path to process event vector. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Acked-by: Shijith Thotton <sthotton@marvell.com> --- examples/l3fwd/l3fwd_em.c | 13 +++-- examples/l3fwd/l3fwd_em.h | 29 +++++------ examples/l3fwd/l3fwd_em_hlm.h | 72 +++++----------------------- examples/l3fwd/l3fwd_em_sequential.h | 25 ++++++---- examples/l3fwd/l3fwd_event.h | 21 -------- 5 files changed, 48 insertions(+), 112 deletions(-) diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index a203dc9e46..35de31157e 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -860,10 +860,15 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; unsigned int lcore_id; + uint16_t *dst_ports; if (event_p_id < 0) return; + dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_ports == NULL) + return; lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; @@ -885,13 +890,12 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, } #if defined RTE_ARCH_X86 || defined __ARM_NEON - l3fwd_em_process_event_vector(events[i].vec, lconf); + l3fwd_em_process_event_vector(events[i].vec, lconf, + dst_ports); #else l3fwd_em_no_opt_process_event_vector(events[i].vec, - lconf); + lconf, dst_ports); #endif - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -915,6 +919,7 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free(dst_ports); } int __rte_noinline diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h index fe2ee59f6a..7d051fc076 100644 --- a/examples/l3fwd/l3fwd_em.h +++ b/examples/l3fwd/l3fwd_em.h @@ -100,7 +100,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid, } } -static __rte_always_inline void +static __rte_always_inline uint16_t l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) { struct rte_ether_hdr *eth_hdr; @@ -117,6 +117,8 @@ l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf) m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf); else m->port = BAD_PORT; + + return m->port; } /* @@ -179,7 +181,8 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events, static inline void l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, + uint16_t *dst_ports) { struct rte_mbuf **mbufs = vec->mbufs; int32_t i; @@ -188,30 +191,20 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec, for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++) rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *)); - /* Process first packet to init vector attributes */ - l3fwd_em_simple_process(mbufs[0], qconf); - if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; - } - /* * Prefetch and forward already prefetched packets. */ - for (i = 1; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { + for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) { rte_prefetch0( rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *)); - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); } /* Forward remaining prefetched packets */ - for (; i < vec->nb_elem; i++) { - l3fwd_em_simple_process(mbufs[i], qconf); - event_vector_attr_validate(vec, mbufs[i]); - } + for (; i < vec->nb_elem; i++) + dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 12b997e477..2e11eefad7 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -332,70 +332,20 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev, static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_port) { - struct rte_mbuf **mbufs = vec->mbufs; - uint16_t dst_port[MAX_PKT_BURST]; - int32_t i, j, n, pos; - - for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < vec->nb_elem; j++) - rte_prefetch0( - rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); + uint16_t i; if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - - n = RTE_ALIGN_FLOOR(vec->nb_elem, EM_HASH_LOOKUP_COUNT); - for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) { - uint32_t pkt_type = - RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP; - uint32_t l3_type, tcp_or_udp; - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) - pkt_type &= mbufs[j + i]->packet_type; - - l3_type = pkt_type & RTE_PTYPE_L3_MASK; - tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP); - - for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; - i < EM_HASH_LOOKUP_COUNT && pos < vec->nb_elem; - i++, pos++) { - rte_prefetch0(rte_pktmbuf_mtod(mbufs[pos], - struct rte_ether_hdr *) + - 1); - } - - if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) { - em_get_dst_port_ipv4xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) { - em_get_dst_port_ipv6xN_events(qconf, &mbufs[j], - &dst_port[j]); - } else { - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = - em_get_dst_port(qconf, mbufs[j + i], - mbufs[j + i]->port); - process_packet(mbufs[j + i], - &mbufs[j + i]->port); - event_vector_attr_validate(vec, mbufs[j + i]); - } - continue; - } - processx4_step3(&mbufs[j], &dst_port[j]); - - for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) { - mbufs[j + i]->port = dst_port[j + i]; - event_vector_attr_validate(vec, mbufs[j + i]); - } - } - - for (; j < vec->nb_elem; j++) { - mbufs[j]->port = - em_get_dst_port(qconf, mbufs[j], mbufs[j]->port); - process_packet(mbufs[j], &mbufs[j]->port); - event_vector_attr_validate(vec, mbufs[j]); - } + l3fwd_em_process_packets(vec->nb_elem, vec->mbufs, dst_port, + vec->port, qconf, 1); + else + for (i = 0; i < vec->nb_elem; i++) + l3fwd_em_process_packets(1, &vec->mbufs[i], + &dst_port[i], + vec->mbufs[i]->port, qconf, 1); + + process_event_vector(vec, dst_port); } #endif /* __L3FWD_EM_HLM_H__ */ diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index d2f75edb8a..067f23889a 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -113,39 +113,48 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **events, for (i = 1, j = 0; j < nb_rx; i++, j++) { struct rte_mbuf *mbuf = events[j]->mbuf; + uint16_t port; if (i < nb_rx) { rte_prefetch0(rte_pktmbuf_mtod( events[i]->mbuf, struct rte_ether_hdr *) + 1); } + port = mbuf->port; mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); + if (mbuf->port == BAD_PORT) + mbuf->port = port; } } static inline void l3fwd_em_process_event_vector(struct rte_event_vector *vec, - struct lcore_conf *qconf) + struct lcore_conf *qconf, uint16_t *dst_ports) { + const uint8_t attr_valid = vec->attr_valid; struct rte_mbuf **mbufs = vec->mbufs; int32_t i, j; rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1); - if (vec->attr_valid) - vec->port = em_get_dst_port(qconf, mbufs[0], mbufs[0]->port); - for (i = 0, j = 1; i < vec->nb_elem; i++, j++) { if (j < vec->nb_elem) rte_prefetch0(rte_pktmbuf_mtod(mbufs[j], struct rte_ether_hdr *) + 1); - mbufs[i]->port = - em_get_dst_port(qconf, mbufs[i], mbufs[i]->port); - process_packet(mbufs[i], &mbufs[i]->port); - event_vector_attr_validate(vec, mbufs[i]); + dst_ports[i] = em_get_dst_port(qconf, mbufs[i], + attr_valid ? vec->port : + mbufs[i]->port); } + j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP); + + for (i = 0; i != j; i += FWDSTEP) + processx4_step3(&vec->mbufs[i], &dst_ports[i]); + for (; i < vec->nb_elem; i++) + process_packet(vec->mbufs[i], &dst_ports[i]); + + process_event_vector(vec, dst_ports); } #endif /* __L3FWD_EM_SEQUENTIAL_H__ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index 3fe38aada0..e21817c36b 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -103,27 +103,6 @@ process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) } #endif -static inline void -event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) -{ - /* l3fwd application only changes mbuf port while processing */ - if (vec->attr_valid && (vec->port != mbuf->port)) - vec->attr_valid = 0; -} - -static inline void -event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) -{ - if (vec->attr_valid) { - vec->queue = txq; - } else { - int i; - - for (i = 0; i < vec->nb_elem; i++) - rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], txq); - } -} - static inline uint16_t filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, uint16_t nb_pkts) -- 2.25.1 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH v6 1/5] examples/l3fwd: fix port group mask generation 2022-10-25 16:05 ` [PATCH v6 " pbhagavatula ` (3 preceding siblings ...) 2022-10-25 16:05 ` [PATCH v6 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula @ 2022-10-31 14:52 ` Thomas Monjalon 4 siblings, 0 replies; 41+ messages in thread From: Thomas Monjalon @ 2022-10-31 14:52 UTC (permalink / raw) To: Pavan Nikhilesh Cc: jerinj, David Christensen, stable, dev, stable, Shijith Thotton 25/10/2022 18:05, pbhagavatula@marvell.com: > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Fix port group mask generation in altivec, vec_any_eq returns > 0 or 1 while port_groupx4 expects comparison mask result. > > Fixes: 2193b7467f7a ("examples/l3fwd: optimize packet processing on powerpc") > Cc: stable@dpdk.org > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > Acked-by: Shijith Thotton <sthotton@marvell.com> Series applied, thanks. ^ permalink raw reply [flat|nested] 41+ messages in thread
end of thread, other threads:[~2022-10-31 14:53 UTC | newest] Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-08-29 9:44 [PATCH 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-08-29 9:44 ` [PATCH 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-08-29 9:44 ` [PATCH 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula 2022-08-29 9:44 ` [PATCH 4/5] examples/l3fwd: use em " pbhagavatula 2022-08-29 9:44 ` [PATCH 5/5] examples/l3fwd: fix event vector processing in fib pbhagavatula 2022-09-02 9:18 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-09-02 9:18 ` [PATCH v2 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-09-02 9:18 ` [PATCH v2 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula 2022-09-02 9:18 ` [PATCH v2 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula 2022-09-02 9:18 ` [PATCH v2 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula 2022-09-08 18:33 ` [PATCH v2 1/5] examples/l3fwd: fix port group mask generation David Christensen 2022-09-09 5:56 ` [EXT] " Pavan Nikhilesh Bhagavatula 2022-09-11 18:12 ` [PATCH v3 " pbhagavatula 2022-09-11 18:12 ` [PATCH v3 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-09-11 18:12 ` [PATCH v3 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula 2022-09-11 18:12 ` [PATCH v3 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula 2022-10-07 20:03 ` [EXT] " Shijith Thotton 2022-09-11 18:12 ` [PATCH v3 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula 2022-10-07 20:01 ` [EXT] " Shijith Thotton 2022-10-11 9:08 ` [PATCH v4 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-10-11 9:08 ` [PATCH v4 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-10-11 9:08 ` [PATCH v4 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula 2022-10-11 9:08 ` [PATCH v4 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula 2022-10-11 9:08 ` [PATCH v4 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula 2022-10-11 10:12 ` [PATCH v5 1/5] examples/l3fwd: fix port group mask generation pbhagavatula 2022-10-11 10:12 ` [PATCH v5 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-10-17 12:06 ` [EXT] " Shijith Thotton 2022-10-11 10:12 ` [PATCH v5 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula 2022-10-17 12:06 ` [EXT] " Shijith Thotton 2022-10-11 10:12 ` [PATCH v5 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula 2022-10-17 12:06 ` [EXT] " Shijith Thotton 2022-10-11 10:12 ` [PATCH v5 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula 2022-10-12 8:57 ` [EXT] " Shijith Thotton 2022-10-17 12:05 ` [EXT] [PATCH v5 1/5] examples/l3fwd: fix port group mask generation Shijith Thotton 2022-10-20 16:15 ` Pavan Nikhilesh Bhagavatula 2022-10-25 16:05 ` [PATCH v6 " pbhagavatula 2022-10-25 16:05 ` [PATCH v6 2/5] examples/l3fwd: split processing and send stages pbhagavatula 2022-10-25 16:05 ` [PATCH v6 3/5] examples/l3fwd: use lpm vector path for event vector pbhagavatula 2022-10-25 16:05 ` [PATCH v6 4/5] examples/l3fwd: fix event vector processing in fib pbhagavatula 2022-10-25 16:05 ` [PATCH v6 5/5] examples/l3fwd: use em vector path for event vector pbhagavatula 2022-10-31 14:52 ` [PATCH v6 1/5] examples/l3fwd: fix port group mask generation Thomas Monjalon
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).