From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 1DF4FA329E for ; Thu, 24 Oct 2019 04:58:33 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 72A421C1EE; Thu, 24 Oct 2019 04:58:32 +0200 (CEST) Received: from foss.arm.com (unknown [217.140.110.172]) by dpdk.org (Postfix) with ESMTP id 892991C1E3; Thu, 24 Oct 2019 04:58:27 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 9AA4A31F; Wed, 23 Oct 2019 19:58:18 -0700 (PDT) Received: from net-arm-c2400-02.shanghai.arm.com (net-arm-c2400-02.shanghai.arm.com [10.169.40.42]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 37A143F71A; Wed, 23 Oct 2019 19:58:16 -0700 (PDT) From: Ruifeng Wang To: jerinj@marvell.com, gavin.hu@arm.com, wenzhuo.lu@intel.com, konstantin.ananyev@intel.com Cc: dev@dpdk.org, honnappa.nagarahalli@arm.com, nd@arm.com, Ruifeng Wang , stable@dpdk.org Date: Thu, 24 Oct 2019 10:58:02 +0800 Message-Id: <20191024025803.10571-2-ruifeng.wang@arm.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20191024025803.10571-1-ruifeng.wang@arm.com> References: <20191024025803.10571-1-ruifeng.wang@arm.com> Subject: [dpdk-stable] [PATCH 1/2] net/ixgbe: add ptype parse for aarch64 vector PMD X-BeenThere: stable@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches for DPDK stable branches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: stable-bounces@dpdk.org Sender: "stable" Ptype parse is missing in aarch64 vector PMD. It makes packet type info provided by NIC get lost, thus requires extra CPU cycles to do this. Add the parse process to utilize NIC hardware capability. In test with l3fwd (removed port conf DEV_RX_OFFLOAD_CHECKSUM), observed over 3% performance gain. Fixes: b20971b6cca0 ("net/ixgbe: implement vector driver for ARM") Cc: stable@dpdk.org Signed-off-by: Ruifeng Wang Reviewed-by: Gavin Hu --- drivers/net/ixgbe/ixgbe_ethdev.c | 2 +- drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c | 64 +++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index dbce7a80e..3c7624f3a 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -3908,7 +3908,7 @@ ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev) dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc) return ptypes; -#if defined(RTE_ARCH_X86) +#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) if (dev->rx_pkt_burst == ixgbe_recv_pkts_vec || dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec) return ptypes; diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c index eeb825911..36e14c66f 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c @@ -146,6 +146,68 @@ desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2, #define IXGBE_VPMD_DESC_EOP_MASK 0x02020202 #define IXGBE_UINT8_BIT (CHAR_BIT * sizeof(uint8_t)) +static inline uint32_t +get_packet_type(uint32_t pkt_info, + uint32_t etqf_check, + uint32_t tunnel_check) +{ + if (etqf_check) + return RTE_PTYPE_UNKNOWN; + + if (tunnel_check) { + pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL; + return ptype_table_tn[pkt_info]; + } + + pkt_info &= IXGBE_PACKET_TYPE_MASK_82599; + return ptype_table[pkt_info]; +} + +static inline void +desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask, + struct rte_mbuf **rx_pkts) +{ + uint32x4_t etqf_check, tunnel_check; + uint32x4_t etqf_mask = vdupq_n_u32(0x8000); + uint32x4_t tunnel_mask = vdupq_n_u32(0x10000); + uint32x4_t ptype_mask = vdupq_n_u32((uint32_t)pkt_type_mask); + uint32x4_t ptype0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]), + vreinterpretq_u32_u64(descs[2])).val[0]; + uint32x4_t ptype1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]), + vreinterpretq_u32_u64(descs[3])).val[0]; + + /* interleave low 32 bits, + * now we have 4 ptypes in a NEON register + */ + ptype0 = vzipq_u32(ptype0, ptype1).val[0]; + + /* mask etqf bits */ + etqf_check = vandq_u32(ptype0, etqf_mask); + /* mask tunnel bits */ + tunnel_check = vandq_u32(ptype0, tunnel_mask); + + /* shift right by IXGBE_PACKET_TYPE_SHIFT, and apply ptype mask */ + ptype0 = vandq_u32(vshrq_n_u32(ptype0, IXGBE_PACKET_TYPE_SHIFT), + ptype_mask); + + rx_pkts[0]->packet_type = + get_packet_type(vgetq_lane_u32(ptype0, 0), + vgetq_lane_u32(etqf_check, 0), + vgetq_lane_u32(tunnel_check, 0)); + rx_pkts[1]->packet_type = + get_packet_type(vgetq_lane_u32(ptype0, 1), + vgetq_lane_u32(etqf_check, 1), + vgetq_lane_u32(tunnel_check, 1)); + rx_pkts[2]->packet_type = + get_packet_type(vgetq_lane_u32(ptype0, 2), + vgetq_lane_u32(etqf_check, 2), + vgetq_lane_u32(tunnel_check, 2)); + rx_pkts[3]->packet_type = + get_packet_type(vgetq_lane_u32(ptype0, 3), + vgetq_lane_u32(etqf_check, 3), + vgetq_lane_u32(tunnel_check, 3)); +} + static inline uint16_t _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) @@ -303,6 +365,8 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, vst1q_u8((uint8_t *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); + desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]); + /* C.5 calc available number of desc */ if (unlikely(stat == 0)) { nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP; -- 2.17.1