From: Rahul Bhansali <rbhansali@marvell.com>
To: Rahul Bhansali <rbhansali@marvell.com>,
"dev@dpdk.org" <dev@dpdk.org>,
Radu Nicolau <radu.nicolau@intel.com>,
Akhil Goyal <gakhil@marvell.com>,
Ruifeng Wang <ruifeng.wang@arm.com>
Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>,
Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
Subject: RE: [PATCH v2 2/2] examples/ipsec-secgw: add support of NEON with poll mode
Date: Fri, 17 Jun 2022 07:51:17 +0000 [thread overview]
Message-ID: <PH0PR18MB3846CFC980E4A0DE964F4696B8AF9@PH0PR18MB3846.namprd18.prod.outlook.com> (raw)
In-Reply-To: <20220617074241.3260496-2-rbhansali@marvell.com>
CC: Konstantin Ananyev
> -----Original Message-----
> From: Rahul Bhansali <rbhansali@marvell.com>
> Sent: Friday, June 17, 2022 1:13 PM
> To: dev@dpdk.org; Radu Nicolau <radu.nicolau@intel.com>; Akhil Goyal
> <gakhil@marvell.com>; Ruifeng Wang <ruifeng.wang@arm.com>
> Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Rahul Bhansali
> <rbhansali@marvell.com>
> Subject: [PATCH v2 2/2] examples/ipsec-secgw: add support of NEON with poll
> mode
>
> This adds the support of NEON based lpm lookup along with multi packet
> processing for burst send in packets routing.
>
> Performance impact:
> On cn10k, with poll mode inline protocol, outbound performance increased by
> upto ~8% and inbound performance increased by upto ~6%.
>
> Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>
> ---
> Changes in v2: Removed Neon packet grouping function and used the common
> one.
>
> examples/ipsec-secgw/Makefile | 5 +-
> examples/ipsec-secgw/ipsec-secgw.c | 25 ++
> examples/ipsec-secgw/ipsec_lpm_neon.h | 213 +++++++++++++++++
> examples/ipsec-secgw/ipsec_neon.h | 321 ++++++++++++++++++++++++++
> examples/ipsec-secgw/ipsec_worker.c | 9 +
> 5 files changed, 571 insertions(+), 2 deletions(-) create mode 100644
> examples/ipsec-secgw/ipsec_lpm_neon.h
> create mode 100644 examples/ipsec-secgw/ipsec_neon.h
>
> diff --git a/examples/ipsec-secgw/Makefile b/examples/ipsec-secgw/Makefile
> index 89af54bd37..ffe232774d 100644
> --- a/examples/ipsec-secgw/Makefile
> +++ b/examples/ipsec-secgw/Makefile
> @@ -36,6 +36,7 @@ shared: build/$(APP)-shared
> static: build/$(APP)-static
> ln -sf $(APP)-static build/$(APP)
>
> +INCLUDES =-I../common
> PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) CFLAGS += -O3
> $(shell $(PKGCONF) --cflags libdpdk) LDFLAGS_SHARED = $(shell $(PKGCONF) --
> libs libdpdk) @@ -53,10 +54,10 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
> CFLAGS += -Wno-address-of-packed-member
>
> build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS)
> +$(LDFLAGS_SHARED)
>
> build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS)
> +$(LDFLAGS_STATIC)
>
> build:
> @mkdir -p $@
> diff --git a/examples/ipsec-secgw/ipsec-secgw.c b/examples/ipsec-
> secgw/ipsec-secgw.c
> index 4d8a4a71b8..b650668305 100644
> --- a/examples/ipsec-secgw/ipsec-secgw.c
> +++ b/examples/ipsec-secgw/ipsec-secgw.c
> @@ -56,6 +56,10 @@
> #include "parser.h"
> #include "sad.h"
>
> +#if defined(__ARM_NEON)
> +#include "ipsec_lpm_neon.h"
> +#endif
> +
> volatile bool force_quit;
>
> #define MAX_JUMBO_PKT_LEN 9600
> @@ -100,6 +104,12 @@ struct ethaddr_info ethaddr_tbl[RTE_MAX_ETHPORTS]
> = {
> { 0, ETHADDR(0x00, 0x16, 0x3e, 0x49, 0x9e, 0xdd) } };
>
> +/*
> + * To hold ethernet header per port, which will be applied
> + * to outgoing packets.
> + */
> +xmm_t val_eth[RTE_MAX_ETHPORTS];
> +
> struct flow_info flow_info_tbl[RTE_MAX_ETHPORTS];
>
> #define CMD_LINE_OPT_CONFIG "config"
> @@ -568,9 +578,16 @@ process_pkts(struct lcore_conf *qconf, struct
> rte_mbuf **pkts,
> process_pkts_outbound(&qconf->outbound, &traffic);
> }
>
> +#if defined __ARM_NEON
> + /* Neon optimized packet routing */
> + route4_pkts_neon(qconf->rt4_ctx, traffic.ip4.pkts, traffic.ip4.num,
> + qconf->outbound.ipv4_offloads, true);
> + route6_pkts_neon(qconf->rt6_ctx, traffic.ip6.pkts, traffic.ip6.num);
> +#else
> route4_pkts(qconf->rt4_ctx, traffic.ip4.pkts, traffic.ip4.num,
> qconf->outbound.ipv4_offloads, true);
> route6_pkts(qconf->rt6_ctx, traffic.ip6.pkts, traffic.ip6.num);
> +#endif
> }
>
> static inline void
> @@ -1403,6 +1420,8 @@ add_dst_ethaddr(uint16_t port, const struct
> rte_ether_addr *addr)
> return -EINVAL;
>
> ethaddr_tbl[port].dst = ETHADDR_TO_UINT64(addr);
> + rte_ether_addr_copy((struct rte_ether_addr *)ðaddr_tbl[port].dst,
> + (struct rte_ether_addr *)(val_eth + port));
> return 0;
> }
>
> @@ -1865,6 +1884,12 @@ port_init(uint16_t portid, uint64_t req_rx_offloads,
> uint64_t req_tx_offloads)
> portid, rte_strerror(-ret));
>
> ethaddr_tbl[portid].src = ETHADDR_TO_UINT64(ðaddr);
> +
> + rte_ether_addr_copy((struct rte_ether_addr *)ðaddr_tbl[portid].dst,
> + (struct rte_ether_addr *)(val_eth + portid));
> + rte_ether_addr_copy((struct rte_ether_addr *)ðaddr_tbl[portid].src,
> + (struct rte_ether_addr *)(val_eth + portid) + 1);
> +
> print_ethaddr("Address: ", ðaddr);
> printf("\n");
>
> diff --git a/examples/ipsec-secgw/ipsec_lpm_neon.h b/examples/ipsec-
> secgw/ipsec_lpm_neon.h
> new file mode 100644
> index 0000000000..959a5a8666
> --- /dev/null
> +++ b/examples/ipsec-secgw/ipsec_lpm_neon.h
> @@ -0,0 +1,213 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2022 Marvell.
> + */
> +
> +#ifndef __IPSEC_LPM_NEON_H__
> +#define __IPSEC_LPM_NEON_H__
> +
> +#include <arm_neon.h>
> +#include "ipsec_neon.h"
> +
> +/*
> + * Append ethernet header and read destination IPV4 addresses from 4 mbufs.
> + */
> +static inline void
> +processx4_step1(struct rte_mbuf *pkt[FWDSTEP], int32x4_t *dip,
> + uint64_t *inline_flag)
> +{
> + struct rte_ipv4_hdr *ipv4_hdr;
> + struct rte_ether_hdr *eth_hdr;
> + int32_t dst[FWDSTEP];
> + int i;
> +
> + for (i = 0; i < FWDSTEP; i++) {
> + eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_prepend(pkt[i],
> +
> RTE_ETHER_HDR_LEN);
> + pkt[i]->ol_flags |= RTE_MBUF_F_TX_IPV4;
> + pkt[i]->l2_len = RTE_ETHER_HDR_LEN;
> +
> + ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
> +
> + /* Fetch destination IPv4 address */
> + dst[i] = ipv4_hdr->dst_addr;
> + *inline_flag |= pkt[i]->ol_flags &
> RTE_MBUF_F_TX_SEC_OFFLOAD;
> + }
> +
> + dip[0] = vld1q_s32(dst);
> +}
> +
> +/*
> + * Lookup into LPM for destination port.
> + */
> +static inline void
> +processx4_step2(struct rt_ctx *rt_ctx, int32x4_t dip, uint64_t inline_flag,
> + struct rte_mbuf *pkt[FWDSTEP], uint16_t dprt[FWDSTEP]) {
> + uint32_t next_hop;
> + rte_xmm_t dst;
> + uint8_t i;
> +
> + dip = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(dip)));
> +
> + /* If all 4 packets are non-inline */
> + if (!inline_flag) {
> + rte_lpm_lookupx4((struct rte_lpm *)rt_ctx, dip, dst.u32,
> + BAD_PORT);
> + /* get rid of unused upper 16 bit for each dport. */
> + vst1_s16((int16_t *)dprt, vqmovn_s32(dst.x));
> + return;
> + }
> +
> + /* Inline and non-inline packets */
> + dst.x = dip;
> + for (i = 0; i < FWDSTEP; i++) {
> + if (pkt[i]->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) {
> + next_hop = get_hop_for_offload_pkt(pkt[i], 0);
> + dprt[i] = (uint16_t) (((next_hop &
> + RTE_LPM_LOOKUP_SUCCESS)
> != 0)
> + ? next_hop : BAD_PORT);
> +
> + } else {
> + dprt[i] = (uint16_t) ((rte_lpm_lookup(
> + (struct rte_lpm *)rt_ctx,
> + dst.u32[i], &next_hop) == 0)
> + ? next_hop : BAD_PORT);
> + }
> + }
> +}
> +
> +/*
> + * Process single packets for destination port.
> + */
> +static inline void
> +process_single_pkt(struct rt_ctx *rt_ctx, struct rte_mbuf *pkt,
> + uint16_t *dst_port)
> +{
> + struct rte_ether_hdr *eth_hdr;
> + struct rte_ipv4_hdr *ipv4_hdr;
> + uint32_t next_hop;
> + uint32_t dst_ip;
> +
> + eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_prepend(pkt,
> +
> RTE_ETHER_HDR_LEN);
> + pkt->ol_flags |= RTE_MBUF_F_TX_IPV4;
> + pkt->l2_len = RTE_ETHER_HDR_LEN;
> +
> + if (pkt->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) {
> + next_hop = get_hop_for_offload_pkt(pkt, 0);
> + *dst_port = (uint16_t) (((next_hop &
> + RTE_LPM_LOOKUP_SUCCESS) != 0)
> + ? next_hop : BAD_PORT);
> + } else {
> + ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
> + dst_ip = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
> + *dst_port = (uint16_t) ((rte_lpm_lookup(
> + (struct rte_lpm *)rt_ctx,
> + dst_ip, &next_hop) == 0)
> + ? next_hop : BAD_PORT);
> + }
> +}
> +
> +/*
> + * Buffer optimized handling of IPv6 packets.
> + */
> +static inline void
> +route6_pkts_neon(struct rt_ctx *rt_ctx, struct rte_mbuf **pkts, int
> +nb_rx) {
> + uint8_t dst_ip6[MAX_PKT_BURST][16];
> + int32_t dst_port[MAX_PKT_BURST];
> + struct rte_ether_hdr *eth_hdr;
> + struct rte_ipv6_hdr *ipv6_hdr;
> + int32_t hop[MAX_PKT_BURST];
> + struct rte_mbuf *pkt;
> + uint8_t lpm_pkts = 0;
> + int32_t i;
> +
> + if (nb_rx == 0)
> + return;
> +
> + /* Need to do an LPM lookup for non-inline packets. Inline packets will
> + * have port ID in the SA
> + */
> +
> + for (i = 0; i < nb_rx; i++) {
> + pkt = pkts[i];
> + eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_prepend(pkt,
> +
> RTE_ETHER_HDR_LEN);
> + pkt->l2_len = RTE_ETHER_HDR_LEN;
> + pkt->ol_flags |= RTE_MBUF_F_TX_IPV6;
> +
> + if (!(pkt->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD)) {
> + /* Security offload not enabled. So an LPM lookup is
> + * required to get the hop
> + */
> + ipv6_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1);
> + memcpy(&dst_ip6[lpm_pkts][0],
> + ipv6_hdr->dst_addr, 16);
> + lpm_pkts++;
> + }
> + }
> +
> + rte_lpm6_lookup_bulk_func((struct rte_lpm6 *)rt_ctx, dst_ip6,
> + hop, lpm_pkts);
> +
> + lpm_pkts = 0;
> +
> + for (i = 0; i < nb_rx; i++) {
> + pkt = pkts[i];
> + if (pkt->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) {
> + /* Read hop from the SA */
> + dst_port[i] = get_hop_for_offload_pkt(pkt, 1);
> + } else {
> + /* Need to use hop returned by lookup */
> + dst_port[i] = hop[lpm_pkts++];
> + }
> + if (dst_port[i] == -1)
> + dst_port[i] = BAD_PORT;
> + }
> +
> + /* Send packets */
> + send_multi_pkts(pkts, (uint16_t *)dst_port, nb_rx, 0, 0, false); }
> +
> +/*
> + * Buffer optimized handling of IPv4 packets.
> + */
> +static inline void
> +route4_pkts_neon(struct rt_ctx *rt_ctx, struct rte_mbuf **pkts, int nb_rx,
> + uint64_t tx_offloads, bool ip_cksum) {
> + const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
> + const int32_t m = nb_rx % FWDSTEP;
> + uint16_t dst_port[MAX_PKT_BURST];
> + uint64_t inline_flag = 0;
> + int32x4_t dip;
> + int32_t i;
> +
> + if (nb_rx == 0)
> + return;
> +
> + for (i = 0; i != k; i += FWDSTEP) {
> + processx4_step1(&pkts[i], &dip, &inline_flag);
> + processx4_step2(rt_ctx, dip, inline_flag, &pkts[i],
> + &dst_port[i]);
> + }
> +
> + /* Classify last up to 3 packets one by one */
> + switch (m) {
> + case 3:
> + process_single_pkt(rt_ctx, pkts[i], &dst_port[i]);
> + i++;
> + /* fallthrough */
> + case 2:
> + process_single_pkt(rt_ctx, pkts[i], &dst_port[i]);
> + i++;
> + /* fallthrough */
> + case 1:
> + process_single_pkt(rt_ctx, pkts[i], &dst_port[i]);
> + }
> +
> + send_multi_pkts(pkts, dst_port, nb_rx, tx_offloads, ip_cksum, true); }
> +
> +#endif /* __IPSEC_LPM_NEON_H__ */
> diff --git a/examples/ipsec-secgw/ipsec_neon.h b/examples/ipsec-
> secgw/ipsec_neon.h
> new file mode 100644
> index 0000000000..0f72219ed0
> --- /dev/null
> +++ b/examples/ipsec-secgw/ipsec_neon.h
> @@ -0,0 +1,321 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2022 Marvell.
> + */
> +
> +#ifndef _IPSEC_NEON_H_
> +#define _IPSEC_NEON_H_
> +
> +#include "ipsec.h"
> +#include "neon_common.h"
> +
> +#define MAX_TX_BURST (MAX_PKT_BURST / 2)
> +#define BAD_PORT ((uint16_t)-1)
> +
> +extern xmm_t val_eth[RTE_MAX_ETHPORTS];
> +
> +/*
> + * Update source and destination MAC addresses in the ethernet header.
> + */
> +static inline void
> +processx4_step3(struct rte_mbuf *pkts[FWDSTEP], uint16_t
> dst_port[FWDSTEP],
> + uint64_t tx_offloads, bool ip_cksum, uint8_t *l_pkt) {
> + uint32x4_t te[FWDSTEP];
> + uint32x4_t ve[FWDSTEP];
> + uint32_t *p[FWDSTEP];
> + struct rte_mbuf *pkt;
> + uint8_t i;
> +
> + for (i = 0; i < FWDSTEP; i++) {
> + pkt = pkts[i];
> +
> + /* Check if it is a large packet */
> + if (pkt->pkt_len - RTE_ETHER_HDR_LEN > mtu_size)
> + *l_pkt |= 1;
> +
> + p[i] = rte_pktmbuf_mtod(pkt, uint32_t *);
> + ve[i] = vreinterpretq_u32_s32(val_eth[dst_port[i]]);
> + te[i] = vld1q_u32(p[i]);
> +
> + /* Update last 4 bytes */
> + ve[i] = vsetq_lane_u32(vgetq_lane_u32(te[i], 3), ve[i], 3);
> + vst1q_u32(p[i], ve[i]);
> +
> + if (ip_cksum) {
> + struct rte_ipv4_hdr *ip;
> +
> + pkt->ol_flags |= tx_offloads;
> +
> + ip = (struct rte_ipv4_hdr *)
> + (p[i] + RTE_ETHER_HDR_LEN + 1);
> + ip->hdr_checksum = 0;
> +
> + /* calculate IPv4 cksum in SW */
> + if ((pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) == 0)
> + ip->hdr_checksum = rte_ipv4_cksum(ip);
> + }
> +
> + }
> +}
> +
> +/**
> + * Process single packet:
> + * Update source and destination MAC addresses in the ethernet header.
> + */
> +static inline void
> +process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint64_t tx_offloads,
> + bool ip_cksum, uint8_t *l_pkt)
> +{
> + struct rte_ether_hdr *eth_hdr;
> + uint32x4_t te, ve;
> +
> + /* Check if it is a large packet */
> + if (pkt->pkt_len - RTE_ETHER_HDR_LEN > mtu_size)
> + *l_pkt |= 1;
> +
> + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
> +
> + te = vld1q_u32((uint32_t *)eth_hdr);
> + ve = vreinterpretq_u32_s32(val_eth[dst_port[0]]);
> +
> + ve = vcopyq_laneq_u32(ve, 3, te, 3);
> + vst1q_u32((uint32_t *)eth_hdr, ve);
> +
> + if (ip_cksum) {
> + struct rte_ipv4_hdr *ip;
> +
> + pkt->ol_flags |= tx_offloads;
> +
> + ip = (struct rte_ipv4_hdr *)(eth_hdr + 1);
> + ip->hdr_checksum = 0;
> +
> + /* calculate IPv4 cksum in SW */
> + if ((pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) == 0)
> + ip->hdr_checksum = rte_ipv4_cksum(ip);
> + }
> +}
> +
> +static inline void
> +send_packets(struct rte_mbuf *m[], uint16_t port, uint32_t num, bool
> +is_ipv4) {
> + uint8_t proto;
> + uint32_t i;
> +
> + proto = is_ipv4 ? IPPROTO_IP : IPPROTO_IPV6;
> + for (i = 0; i < num; i++)
> + send_single_packet(m[i], port, proto); }
> +
> +static inline void
> +send_packetsx4(struct rte_mbuf *m[], uint16_t port, uint32_t num) {
> + unsigned int lcoreid = rte_lcore_id();
> + struct lcore_conf *qconf;
> + uint32_t len, j, n;
> +
> + qconf = &lcore_conf[lcoreid];
> +
> + len = qconf->tx_mbufs[port].len;
> +
> + /*
> + * If TX buffer for that queue is empty, and we have enough packets,
> + * then send them straightway.
> + */
> + if (num >= MAX_TX_BURST && len == 0) {
> + n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
> + core_stats_update_tx(n);
> + if (unlikely(n < num)) {
> + do {
> + rte_pktmbuf_free(m[n]);
> + } while (++n < num);
> + }
> + return;
> + }
> +
> + /*
> + * Put packets into TX buffer for that queue.
> + */
> +
> + n = len + num;
> + n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num;
> +
> + j = 0;
> + switch (n % FWDSTEP) {
> + while (j < n) {
> + case 0:
> + qconf->tx_mbufs[port].m_table[len + j] = m[j];
> + j++;
> + /* fallthrough */
> + case 3:
> + qconf->tx_mbufs[port].m_table[len + j] = m[j];
> + j++;
> + /* fallthrough */
> + case 2:
> + qconf->tx_mbufs[port].m_table[len + j] = m[j];
> + j++;
> + /* fallthrough */
> + case 1:
> + qconf->tx_mbufs[port].m_table[len + j] = m[j];
> + j++;
> + }
> + }
> +
> + len += n;
> +
> + /* enough pkts to be sent */
> + if (unlikely(len == MAX_PKT_BURST)) {
> +
> + send_burst(qconf, MAX_PKT_BURST, port);
> +
> + /* copy rest of the packets into the TX buffer. */
> + len = num - n;
> + if (len == 0)
> + goto exit;
> +
> + j = 0;
> + switch (len % FWDSTEP) {
> + while (j < len) {
> + case 0:
> + qconf->tx_mbufs[port].m_table[j] = m[n + j];
> + j++;
> + /* fallthrough */
> + case 3:
> + qconf->tx_mbufs[port].m_table[j] = m[n + j];
> + j++;
> + /* fallthrough */
> + case 2:
> + qconf->tx_mbufs[port].m_table[j] = m[n + j];
> + j++;
> + /* fallthrough */
> + case 1:
> + qconf->tx_mbufs[port].m_table[j] = m[n + j];
> + j++;
> + }
> + }
> + }
> +
> +exit:
> + qconf->tx_mbufs[port].len = len;
> +}
> +
> +/**
> + * Send packets burst to the ports in dst_port array */ static
> +__rte_always_inline void send_multi_pkts(struct rte_mbuf **pkts,
> +uint16_t dst_port[MAX_PKT_BURST],
> + int nb_rx, uint64_t tx_offloads, bool ip_cksum, bool is_ipv4) {
> + unsigned int lcoreid = rte_lcore_id();
> + uint16_t pnum[MAX_PKT_BURST + 1];
> + uint8_t l_pkt = 0;
> + uint16_t dlp, *lp;
> + int i = 0, k;
> +
> + /*
> + * Finish packet processing and group consecutive
> + * packets with the same destination port.
> + */
> + k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
> +
> + if (k != 0) {
> + uint16x8_t dp1, dp2;
> +
> + lp = pnum;
> + lp[0] = 1;
> +
> + processx4_step3(pkts, dst_port, tx_offloads, ip_cksum, &l_pkt);
> +
> + /* dp1: <d[0], d[1], d[2], d[3], ... > */
> + dp1 = vld1q_u16(dst_port);
> +
> + for (i = FWDSTEP; i != k; i += FWDSTEP) {
> + processx4_step3(&pkts[i], &dst_port[i], tx_offloads,
> + ip_cksum, &l_pkt);
> +
> + /*
> + * dp2:
> + * <d[j-3], d[j-2], d[j-1], d[j], ... >
> + */
> + dp2 = vld1q_u16(&dst_port[i - FWDSTEP + 1]);
> + lp = neon_port_groupx4(&pnum[i - FWDSTEP], lp, dp1,
> dp2);
> +
> + /*
> + * dp1:
> + * <d[j], d[j+1], d[j+2], d[j+3], ... >
> + */
> + dp1 = vextq_u16(dp2, dp1, FWDSTEP - 1);
> + }
> +
> + /*
> + * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
> + */
> + dp2 = vextq_u16(dp1, dp1, 1);
> + dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3);
> + lp = neon_port_groupx4(&pnum[i - FWDSTEP], lp, dp1, dp2);
> +
> + /*
> + * remove values added by the last repeated
> + * dst port.
> + */
> + lp[0]--;
> + dlp = dst_port[i - 1];
> + } else {
> + /* set dlp and lp to the never used values. */
> + dlp = BAD_PORT - 1;
> + lp = pnum + MAX_PKT_BURST;
> + }
> +
> + /* Process up to last 3 packets one by one. */
> + switch (nb_rx % FWDSTEP) {
> + case 3:
> + process_packet(pkts[i], dst_port + i, tx_offloads, ip_cksum,
> + &l_pkt);
> + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, i);
> + i++;
> + /* fallthrough */
> + case 2:
> + process_packet(pkts[i], dst_port + i, tx_offloads, ip_cksum,
> + &l_pkt);
> + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, i);
> + i++;
> + /* fallthrough */
> + case 1:
> + process_packet(pkts[i], dst_port + i, tx_offloads, ip_cksum,
> + &l_pkt);
> + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, i);
> + }
> +
> + /*
> + * Send packets out, through destination port.
> + * Consecutive packets with the same destination port
> + * are already grouped together.
> + * If destination port for the packet equals BAD_PORT,
> + * then free the packet without sending it out.
> + */
> + for (i = 0; i < nb_rx; i += k) {
> +
> + uint16_t pn;
> +
> + pn = dst_port[i];
> + k = pnum[i];
> +
> + if (likely(pn != BAD_PORT)) {
> + if (l_pkt)
> + /* Large packet is present, need to send
> + * individual packets with fragment
> + */
> + send_packets(pkts + i, pn, k, is_ipv4);
> + else
> + send_packetsx4(pkts + i, pn, k);
> +
> + } else {
> + free_pkts(&pkts[i], k);
> + if (is_ipv4)
> + core_statistics[lcoreid].lpm4.miss++;
> + else
> + core_statistics[lcoreid].lpm6.miss++;
> + }
> + }
> +}
> +
> +#endif /* _IPSEC_NEON_H_ */
> diff --git a/examples/ipsec-secgw/ipsec_worker.c b/examples/ipsec-
> secgw/ipsec_worker.c
> index e1d4e3d864..803157d8ee 100644
> --- a/examples/ipsec-secgw/ipsec_worker.c
> +++ b/examples/ipsec-secgw/ipsec_worker.c
> @@ -12,6 +12,10 @@
> #include "ipsec-secgw.h"
> #include "ipsec_worker.h"
>
> +#if defined(__ARM_NEON)
> +#include "ipsec_lpm_neon.h"
> +#endif
> +
> struct port_drv_mode_data {
> struct rte_security_session *sess;
> struct rte_security_ctx *ctx;
> @@ -1248,8 +1252,13 @@ ipsec_poll_mode_wrkr_inl_pr(void)
> v6_num = ip6.num;
> }
>
> +#if defined __ARM_NEON
> + route4_pkts_neon(rt4_ctx, v4, v4_num, 0, false);
> + route6_pkts_neon(rt6_ctx, v6, v6_num); #else
> route4_pkts(rt4_ctx, v4, v4_num, 0, false);
> route6_pkts(rt6_ctx, v6, v6_num);
> +#endif
> }
> }
> }
> --
> 2.25.1
next prev parent reply other threads:[~2022-06-17 7:51 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-24 9:57 [PATCH] " Rahul Bhansali
2022-05-24 23:00 ` Konstantin Ananyev
2022-05-25 11:03 ` [EXT] " Rahul Bhansali
2022-05-27 11:44 ` Konstantin Ananyev
2022-06-17 7:42 ` [PATCH v2 1/2] examples/l3fwd: common packet group functionality Rahul Bhansali
2022-06-17 7:42 ` [PATCH v2 2/2] examples/ipsec-secgw: add support of NEON with poll mode Rahul Bhansali
2022-06-17 7:51 ` Rahul Bhansali [this message]
2022-06-21 12:55 ` Akhil Goyal
2022-06-23 8:46 ` Zhang, Roy Fan
2022-06-23 9:37 ` Rahul Bhansali
2022-06-17 7:50 ` [PATCH v2 1/2] examples/l3fwd: common packet group functionality Rahul Bhansali
2022-06-20 23:13 ` Konstantin Ananyev
2022-06-21 16:50 ` [EXT] " Rahul Bhansali
2022-06-22 23:25 ` Konstantin Ananyev
2022-06-20 7:49 ` [EXT] " Akhil Goyal
2022-06-20 10:45 ` Thomas Monjalon
2022-06-21 12:56 ` Akhil Goyal
2022-06-23 9:38 ` [PATCH v3 " Rahul Bhansali
2022-06-23 9:38 ` [PATCH v3 2/2] examples/ipsec-secgw: add support of NEON with poll mode Rahul Bhansali
2022-06-26 19:00 ` [PATCH v3 1/2] examples/l3fwd: common packet group functionality Konstantin Ananyev
2022-06-28 8:54 ` [EXT] " Akhil Goyal
2022-07-03 21:40 ` Thomas Monjalon
2022-07-04 12:49 ` [EXT] " Rahul Bhansali
2022-07-04 14:04 ` Thomas Monjalon
2022-07-04 14:48 ` Thomas Monjalon
2022-07-05 16:11 ` [EXT] " Rahul Bhansali
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=PH0PR18MB3846CFC980E4A0DE964F4696B8AF9@PH0PR18MB3846.namprd18.prod.outlook.com \
--to=rbhansali@marvell.com \
--cc=dev@dpdk.org \
--cc=gakhil@marvell.com \
--cc=jerinj@marvell.com \
--cc=konstantin.v.ananyev@yandex.ru \
--cc=radu.nicolau@intel.com \
--cc=ruifeng.wang@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).