From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id 042FD1B585 for ; Fri, 22 Mar 2019 11:12:10 +0100 (CET) Received: from smtp.corp.redhat.com (int-mx08.intmail.prod.int.phx2.redhat.com [10.5.11.23]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 5B1BA2026F; Fri, 22 Mar 2019 10:12:10 +0000 (UTC) Received: from [10.36.112.59] (ovpn-112-59.ams2.redhat.com [10.36.112.59]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 51F9B19C59; Fri, 22 Mar 2019 10:12:09 +0000 (UTC) To: Wenzhuo Lu , dev@dpdk.org References: <1551340136-83843-1-git-send-email-wenzhuo.lu@intel.com> <1553223516-118453-1-git-send-email-wenzhuo.lu@intel.com> <1553223516-118453-7-git-send-email-wenzhuo.lu@intel.com> From: Maxime Coquelin Message-ID: Date: Fri, 22 Mar 2019 11:12:07 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.5.1 MIME-Version: 1.0 In-Reply-To: <1553223516-118453-7-git-send-email-wenzhuo.lu@intel.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit X-Scanned-By: MIMEDefang 2.84 on 10.5.11.23 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.29]); Fri, 22 Mar 2019 10:12:10 +0000 (UTC) Subject: Re: [dpdk-dev] [PATCH v5 6/8] net/ice: support Rx AVX2 vector X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 22 Mar 2019 10:12:11 -0000 On 3/22/19 3:58 AM, Wenzhuo Lu wrote: > Signed-off-by: Wenzhuo Lu > --- > drivers/net/ice/Makefile | 19 ++ > drivers/net/ice/ice_rxtx.c | 16 +- > drivers/net/ice/ice_rxtx.h | 2 + > drivers/net/ice/ice_rxtx_vec_avx2.c | 622 ++++++++++++++++++++++++++++++++++++ > drivers/net/ice/meson.build | 15 + > 5 files changed, 671 insertions(+), 3 deletions(-) > create mode 100644 drivers/net/ice/ice_rxtx_vec_avx2.c > > diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile > index 92594bb..5ba59f4 100644 > --- a/drivers/net/ice/Makefile > +++ b/drivers/net/ice/Makefile > @@ -58,4 +58,23 @@ ifeq ($(CONFIG_RTE_ARCH_X86), y) > SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_sse.c > endif > > +ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) > + CC_AVX2_SUPPORT=1 > +else > + CC_AVX2_SUPPORT=\ > + $(shell $(CC) -march=core-avx2 -dM -E - &1 | \ > + grep -q AVX2 && echo 1) > + ifeq ($(CC_AVX2_SUPPORT), 1) > + ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) > + CFLAGS_ice_rxtx_vec_avx2.o += -march=core-avx2 > + else > + CFLAGS_ice_rxtx_vec_avx2.o += -mavx2 > + endif > + endif > +endif > + > +ifeq ($(CC_AVX2_SUPPORT), 1) > + SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_avx2.c > +endif > + > include $(RTE_SDK)/mk/rte.lib.mk > diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c > index f9ecffa..6191f34 100644 > --- a/drivers/net/ice/ice_rxtx.c > +++ b/drivers/net/ice/ice_rxtx.c > @@ -1494,7 +1494,8 @@ > > #ifdef RTE_ARCH_X86 > if (dev->rx_pkt_burst == ice_recv_pkts_vec || > - dev->rx_pkt_burst == ice_recv_scattered_pkts_vec) > + dev->rx_pkt_burst == ice_recv_scattered_pkts_vec || > + dev->rx_pkt_burst == ice_recv_pkts_vec_avx2) > return ptypes; > #endif > > @@ -2236,21 +2237,30 @@ void __attribute__((cold)) > #ifdef RTE_ARCH_X86 > struct ice_rx_queue *rxq; > int i; > + bool use_avx2 = false; > > if (!ice_rx_vec_dev_check(dev)) { > for (i = 0; i < dev->data->nb_rx_queues; i++) { > rxq = dev->data->rx_queues[i]; > (void)ice_rxq_vec_setup(rxq); > } > + > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 || > + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) > + use_avx2 = true; > + > if (dev->data->scattered_rx) { > PMD_DRV_LOG(DEBUG, > "Using Vector Scattered Rx (port %d).", > dev->data->port_id); > dev->rx_pkt_burst = ice_recv_scattered_pkts_vec; > } else { > - PMD_DRV_LOG(DEBUG, "Using Vector Rx (port %d).", > + PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).", > + use_avx2 ? "avx2 " : "", > dev->data->port_id); > - dev->rx_pkt_burst = ice_recv_pkts_vec; > + dev->rx_pkt_burst = use_avx2 ? > + ice_recv_pkts_vec_avx2 : > + ice_recv_pkts_vec; > } > > return; > diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h > index 1dde4e7..d1c9b92 100644 > --- a/drivers/net/ice/ice_rxtx.h > +++ b/drivers/net/ice/ice_rxtx.h > @@ -179,4 +179,6 @@ uint16_t ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, > uint16_t nb_pkts); > uint16_t ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, > uint16_t nb_pkts); > +uint16_t ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, > + uint16_t nb_pkts); > #endif /* _ICE_RXTX_H_ */ > diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c > new file mode 100644 > index 0000000..763fa9f > --- /dev/null > +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c > @@ -0,0 +1,622 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2019 Intel Corporation > + */ > + > +#include "ice_rxtx_vec_common.h" > + > +#include > + > +#ifndef __INTEL_COMPILER > +#pragma GCC diagnostic ignored "-Wcast-qual" > +#endif > + > +static inline void > +ice_rxq_rearm(struct ice_rx_queue *rxq) > +{ > + int i; > + uint16_t rx_id; > + volatile union ice_rx_desc *rxdp; > + struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; > + > + rxdp = rxq->rx_ring + rxq->rxrearm_start; > + > + /* Pull 'n' more MBUFs into the software ring */ > + if (rte_mempool_get_bulk(rxq->mp, > + (void *)rxep, > + ICE_RXQ_REARM_THRESH) < 0) { > + if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >= > + rxq->nb_rx_desc) { > + __m128i dma_addr0; > + > + dma_addr0 = _mm_setzero_si128(); > + for (i = 0; i < ICE_DESCS_PER_LOOP; i++) { > + rxep[i].mbuf = &rxq->fake_mbuf; > + _mm_store_si128((__m128i *)&rxdp[i].read, > + dma_addr0); > + } > + } > + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += > + ICE_RXQ_REARM_THRESH; > + return; > + } > + > +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC I see same is done for other Intel NICs, but I wonder what would be the performance cost of making it dynamic, if any cost? Having it dynamic (as a dev arg for instance) would make it possible to change the value when the user is using dpdk from a distro. It would also help testing coverage. Btw, how do you select this option with meson build system? From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id CD8D4A00E6 for ; Fri, 22 Mar 2019 11:12:13 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id BE8851B586; Fri, 22 Mar 2019 11:12:12 +0100 (CET) Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id 042FD1B585 for ; Fri, 22 Mar 2019 11:12:10 +0100 (CET) Received: from smtp.corp.redhat.com (int-mx08.intmail.prod.int.phx2.redhat.com [10.5.11.23]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 5B1BA2026F; Fri, 22 Mar 2019 10:12:10 +0000 (UTC) Received: from [10.36.112.59] (ovpn-112-59.ams2.redhat.com [10.36.112.59]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 51F9B19C59; Fri, 22 Mar 2019 10:12:09 +0000 (UTC) To: Wenzhuo Lu , dev@dpdk.org References: <1551340136-83843-1-git-send-email-wenzhuo.lu@intel.com> <1553223516-118453-1-git-send-email-wenzhuo.lu@intel.com> <1553223516-118453-7-git-send-email-wenzhuo.lu@intel.com> From: Maxime Coquelin Message-ID: Date: Fri, 22 Mar 2019 11:12:07 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.5.1 MIME-Version: 1.0 In-Reply-To: <1553223516-118453-7-git-send-email-wenzhuo.lu@intel.com> Content-Type: text/plain; charset="UTF-8"; format="flowed" Content-Language: en-US Content-Transfer-Encoding: 7bit X-Scanned-By: MIMEDefang 2.84 on 10.5.11.23 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.29]); Fri, 22 Mar 2019 10:12:10 +0000 (UTC) Subject: Re: [dpdk-dev] [PATCH v5 6/8] net/ice: support Rx AVX2 vector X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Message-ID: <20190322101207.qWSBYaTgCjOBt_tOuJZ9UMR5tRtR_sWL2zTR6bDUK_4@z> On 3/22/19 3:58 AM, Wenzhuo Lu wrote: > Signed-off-by: Wenzhuo Lu > --- > drivers/net/ice/Makefile | 19 ++ > drivers/net/ice/ice_rxtx.c | 16 +- > drivers/net/ice/ice_rxtx.h | 2 + > drivers/net/ice/ice_rxtx_vec_avx2.c | 622 ++++++++++++++++++++++++++++++++++++ > drivers/net/ice/meson.build | 15 + > 5 files changed, 671 insertions(+), 3 deletions(-) > create mode 100644 drivers/net/ice/ice_rxtx_vec_avx2.c > > diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile > index 92594bb..5ba59f4 100644 > --- a/drivers/net/ice/Makefile > +++ b/drivers/net/ice/Makefile > @@ -58,4 +58,23 @@ ifeq ($(CONFIG_RTE_ARCH_X86), y) > SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_sse.c > endif > > +ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) > + CC_AVX2_SUPPORT=1 > +else > + CC_AVX2_SUPPORT=\ > + $(shell $(CC) -march=core-avx2 -dM -E - &1 | \ > + grep -q AVX2 && echo 1) > + ifeq ($(CC_AVX2_SUPPORT), 1) > + ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) > + CFLAGS_ice_rxtx_vec_avx2.o += -march=core-avx2 > + else > + CFLAGS_ice_rxtx_vec_avx2.o += -mavx2 > + endif > + endif > +endif > + > +ifeq ($(CC_AVX2_SUPPORT), 1) > + SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_avx2.c > +endif > + > include $(RTE_SDK)/mk/rte.lib.mk > diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c > index f9ecffa..6191f34 100644 > --- a/drivers/net/ice/ice_rxtx.c > +++ b/drivers/net/ice/ice_rxtx.c > @@ -1494,7 +1494,8 @@ > > #ifdef RTE_ARCH_X86 > if (dev->rx_pkt_burst == ice_recv_pkts_vec || > - dev->rx_pkt_burst == ice_recv_scattered_pkts_vec) > + dev->rx_pkt_burst == ice_recv_scattered_pkts_vec || > + dev->rx_pkt_burst == ice_recv_pkts_vec_avx2) > return ptypes; > #endif > > @@ -2236,21 +2237,30 @@ void __attribute__((cold)) > #ifdef RTE_ARCH_X86 > struct ice_rx_queue *rxq; > int i; > + bool use_avx2 = false; > > if (!ice_rx_vec_dev_check(dev)) { > for (i = 0; i < dev->data->nb_rx_queues; i++) { > rxq = dev->data->rx_queues[i]; > (void)ice_rxq_vec_setup(rxq); > } > + > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 || > + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) > + use_avx2 = true; > + > if (dev->data->scattered_rx) { > PMD_DRV_LOG(DEBUG, > "Using Vector Scattered Rx (port %d).", > dev->data->port_id); > dev->rx_pkt_burst = ice_recv_scattered_pkts_vec; > } else { > - PMD_DRV_LOG(DEBUG, "Using Vector Rx (port %d).", > + PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).", > + use_avx2 ? "avx2 " : "", > dev->data->port_id); > - dev->rx_pkt_burst = ice_recv_pkts_vec; > + dev->rx_pkt_burst = use_avx2 ? > + ice_recv_pkts_vec_avx2 : > + ice_recv_pkts_vec; > } > > return; > diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h > index 1dde4e7..d1c9b92 100644 > --- a/drivers/net/ice/ice_rxtx.h > +++ b/drivers/net/ice/ice_rxtx.h > @@ -179,4 +179,6 @@ uint16_t ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, > uint16_t nb_pkts); > uint16_t ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, > uint16_t nb_pkts); > +uint16_t ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, > + uint16_t nb_pkts); > #endif /* _ICE_RXTX_H_ */ > diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c > new file mode 100644 > index 0000000..763fa9f > --- /dev/null > +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c > @@ -0,0 +1,622 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2019 Intel Corporation > + */ > + > +#include "ice_rxtx_vec_common.h" > + > +#include > + > +#ifndef __INTEL_COMPILER > +#pragma GCC diagnostic ignored "-Wcast-qual" > +#endif > + > +static inline void > +ice_rxq_rearm(struct ice_rx_queue *rxq) > +{ > + int i; > + uint16_t rx_id; > + volatile union ice_rx_desc *rxdp; > + struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; > + > + rxdp = rxq->rx_ring + rxq->rxrearm_start; > + > + /* Pull 'n' more MBUFs into the software ring */ > + if (rte_mempool_get_bulk(rxq->mp, > + (void *)rxep, > + ICE_RXQ_REARM_THRESH) < 0) { > + if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >= > + rxq->nb_rx_desc) { > + __m128i dma_addr0; > + > + dma_addr0 = _mm_setzero_si128(); > + for (i = 0; i < ICE_DESCS_PER_LOOP; i++) { > + rxep[i].mbuf = &rxq->fake_mbuf; > + _mm_store_si128((__m128i *)&rxdp[i].read, > + dma_addr0); > + } > + } > + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += > + ICE_RXQ_REARM_THRESH; > + return; > + } > + > +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC I see same is done for other Intel NICs, but I wonder what would be the performance cost of making it dynamic, if any cost? Having it dynamic (as a dev arg for instance) would make it possible to change the value when the user is using dpdk from a distro. It would also help testing coverage. Btw, how do you select this option with meson build system?