From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id B37174C91 for ; Wed, 12 Sep 2018 12:20:15 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 12 Sep 2018 03:20:14 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.53,364,1531810800"; d="scan'208";a="79839383" Received: from dpdk-xiaoyun3.sh.intel.com ([10.67.119.41]) by FMSMGA003.fm.intel.com with ESMTP; 12 Sep 2018 03:20:13 -0700 From: Xiaoyun Li To: beilei.xing@intel.com, qi.z.zhang@intel.com Cc: dev@dpdk.org, zhiyong.yang@intel.com, Xiaoyun Li Date: Wed, 12 Sep 2018 18:12:02 +0800 Message-Id: <20180912101202.37950-1-xiaoyun.li@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <1535595399-430873-1-git-send-email-xiaoyun.li@intel.com> References: <1535595399-430873-1-git-send-email-xiaoyun.li@intel.com> Subject: [dpdk-dev] [PATCH v5] net/i40e: add interface to choose latest vector path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 12 Sep 2018 10:20:16 -0000 Right now, vector path is limited to only use on later platform. This patch adds a devarg use-latest-vec to allow the users to use the latest vector path that the platform supported. Namely, using AVX2 vector path on broadwell is possible. Signed-off-by: Xiaoyun Li --- v5: * Simpify the rx set function. v4: * Polish the codes. v3: * Polish the doc and commit log. v2: * Correct the calling of the wrong function last time. * Fix seg fault bug. --- doc/guides/nics/i40e.rst | 8 ++ doc/guides/rel_notes/release_18_11.rst | 4 + drivers/net/i40e/i40e_ethdev.c | 46 +++++++- drivers/net/i40e/i40e_ethdev.h | 3 + drivers/net/i40e/i40e_rxtx.c | 143 +++++++++++++------------ 5 files changed, 136 insertions(+), 68 deletions(-) diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst index 65d87f869..643e6a062 100644 --- a/doc/guides/nics/i40e.rst +++ b/doc/guides/nics/i40e.rst @@ -163,6 +163,14 @@ Runtime Config Options Currently hot-plugging of representor ports is not supported so all required representors must be specified on the creation of the PF. +- ``Use latest vector`` (default ``disable``) + + Vector path was limited to use only on later platform. But users may want the + latest vector path. For example, VPP users may want to use AVX2 vector path on HSW/BDW + because it can get better perf. So ``devargs`` parameter ``use-latest-vec`` is + introduced, for example:: + -w 84:00.0,use-latest-vec=1 + Driver compilation and testing ------------------------------ diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst index 3ae6b3f58..34af591a2 100644 --- a/doc/guides/rel_notes/release_18_11.rst +++ b/doc/guides/rel_notes/release_18_11.rst @@ -54,6 +54,10 @@ New Features Also, make sure to start the actual text at the margin. ========================================================= +* **Added a devarg to use the latest vector path.** + A new devarg ``use-latest-vec`` was introduced to allow users to choose + the latest vector path that the platform supported. For example, VPP users + can use AVX2 vector path on BDW/HSW to get better performance. API Changes ----------- diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 85a6a867f..72377d0b6 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -44,6 +44,7 @@ #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list" #define ETH_I40E_SUPPORT_MULTI_DRIVER "support-multi-driver" #define ETH_I40E_QUEUE_NUM_PER_VF_ARG "queue-num-per-vf" +#define ETH_I40E_USE_LATEST_VEC "use-latest-vec" #define I40E_CLEAR_PXE_WAIT_MS 200 @@ -408,6 +409,7 @@ static const char *const valid_keys[] = { ETH_I40E_FLOATING_VEB_LIST_ARG, ETH_I40E_SUPPORT_MULTI_DRIVER, ETH_I40E_QUEUE_NUM_PER_VF_ARG, + ETH_I40E_USE_LATEST_VEC, NULL}; static const struct rte_pci_id pci_id_i40e_map[] = { @@ -1201,6 +1203,46 @@ i40e_aq_debug_write_global_register(struct i40e_hw *hw, return i40e_aq_debug_write_register(hw, reg_addr, reg_val, cmd_details); } +static int +i40e_parse_latest_vec(struct rte_eth_dev *dev) +{ + struct i40e_adapter *ad = + I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); + int kvargs_count, use_latest_vec; + struct rte_kvargs *kvlist; + + ad->use_latest_vec = false; + + if (!dev->device->devargs) + return 0; + + kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys); + if (!kvlist) + return -EINVAL; + + kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_USE_LATEST_VEC); + if (!kvargs_count) { + rte_kvargs_free(kvlist); + return 0; + } + + if (kvargs_count > 1) + PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only " + "the first one is used !", + ETH_I40E_USE_LATEST_VEC); + + use_latest_vec = atoi((&kvlist->pairs[0])->value); + + rte_kvargs_free(kvlist); + + if (use_latest_vec != 0 && use_latest_vec != 1) + PMD_DRV_LOG(WARNING, "Value should be 0 or 1, set it as 1!"); + + ad->use_latest_vec = (bool)use_latest_vec; + + return 0; +} + static int eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) { @@ -1263,6 +1305,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) /* Check if need to support multi-driver */ i40e_support_multi_driver(dev); + i40e_parse_latest_vec(dev); /* Make sure all is clean before doing PF reset */ i40e_clear_hw(hw); @@ -12527,4 +12570,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_i40e, ETH_I40E_FLOATING_VEB_ARG "=1" ETH_I40E_FLOATING_VEB_LIST_ARG "=" ETH_I40E_QUEUE_NUM_PER_VF_ARG "=1|2|4|8|16" - ETH_I40E_SUPPORT_MULTI_DRIVER "=1"); + ETH_I40E_SUPPORT_MULTI_DRIVER "=1" + ETH_I40E_USE_LATEST_VEC "=1"); diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h index 3fffe5a55..140c92b84 100644 --- a/drivers/net/i40e/i40e_ethdev.h +++ b/drivers/net/i40e/i40e_ethdev.h @@ -1078,6 +1078,9 @@ struct i40e_adapter { uint64_t pctypes_tbl[I40E_FLOW_TYPE_MAX] __rte_cache_min_aligned; uint64_t flow_types_mask; uint64_t pctypes_mask; + + /* For devargs */ + bool use_latest_vec; }; /** diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 2a28ee348..937dfdf3e 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -2909,6 +2909,35 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, qinfo->conf.offloads = txq->offloads; } +static eth_rx_burst_t +i40e_get_latest_rx_vec(bool scatter) +{ +#ifdef RTE_ARCH_X86 + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + return scatter ? i40e_recv_scattered_pkts_vec_avx2 : + i40e_recv_pkts_vec_avx2; +#endif + return scatter ? i40e_recv_scattered_pkts_vec : + i40e_recv_pkts_vec; +} + +static eth_rx_burst_t +i40e_get_recommend_rx_vec(bool scatter) +{ +#ifdef RTE_ARCH_X86 + /* + * since AVX frequency can be different to base frequency, limit + * use of AVX2 version to later plaforms, not all those that could + * theoretically run it. + */ + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) + return scatter ? i40e_recv_scattered_pkts_vec_avx2 : + i40e_recv_pkts_vec_avx2; +#endif + return scatter ? i40e_recv_scattered_pkts_vec : + i40e_recv_pkts_vec; +} + void __attribute__((cold)) i40e_set_rx_function(struct rte_eth_dev *dev) { @@ -2940,57 +2969,17 @@ i40e_set_rx_function(struct rte_eth_dev *dev) } } - if (dev->data->scattered_rx) { - /* Set the non-LRO scattered callback: there are Vector and - * single allocation versions. - */ - if (ad->rx_vec_allowed) { - PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx " - "callback (port=%d).", - dev->data->port_id); - - dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec; -#ifdef RTE_ARCH_X86 - /* - * since AVX frequency can be different to base - * frequency, limit use of AVX2 version to later - * plaforms, not all those that could theoretically - * run it. - */ - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) - dev->rx_pkt_burst = - i40e_recv_scattered_pkts_vec_avx2; -#endif - } else { - PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk " - "allocation callback (port=%d).", - dev->data->port_id); - dev->rx_pkt_burst = i40e_recv_scattered_pkts; - } - /* If parameters allow we are going to choose between the following - * callbacks: - * - Vector - * - Bulk Allocation - * - Single buffer allocation (the simplest one) - */ - } else if (ad->rx_vec_allowed) { - PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX " - "burst size no less than %d (port=%d).", - RTE_I40E_DESCS_PER_LOOP, - dev->data->port_id); - - dev->rx_pkt_burst = i40e_recv_pkts_vec; -#ifdef RTE_ARCH_X86 - /* - * since AVX frequency can be different to base - * frequency, limit use of AVX2 version to later - * plaforms, not all those that could theoretically - * run it. - */ - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) - dev->rx_pkt_burst = i40e_recv_pkts_vec_avx2; -#endif - } else if (ad->rx_bulk_alloc_allowed) { + if (ad->rx_vec_allowed) { + /* Vec Rx path */ + PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.", + dev->data->port_id); + if (ad->use_latest_vec) + dev->rx_pkt_burst = + i40e_get_latest_rx_vec(dev->data->scattered_rx); + else + dev->rx_pkt_burst = + i40e_get_recommend_rx_vec(dev->data->scattered_rx); + } else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) { PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are " "satisfied. Rx Burst Bulk Alloc function " "will be used on port=%d.", @@ -2998,12 +2987,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev) dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc; } else { - PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not " - "satisfied, or Scattered Rx is requested " - "(port=%d).", + /* Simple Rx Path. */ + PMD_INIT_LOG(DEBUG, "Simple Rx path will be used on port=%d.", dev->data->port_id); - - dev->rx_pkt_burst = i40e_recv_pkts; + dev->rx_pkt_burst = dev->data->scattered_rx ? + i40e_recv_scattered_pkts : + i40e_recv_pkts; } /* Propagate information about RX function choice through all queues. */ @@ -3049,6 +3038,31 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq) txq->queue_id); } +static eth_tx_burst_t +i40e_get_latest_tx_vec(void) +{ +#ifdef RTE_ARCH_X86 + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + return i40e_xmit_pkts_vec_avx2; +#endif + return i40e_xmit_pkts_vec; +} + +static eth_tx_burst_t +i40e_get_recommend_tx_vec(void) +{ +#ifdef RTE_ARCH_X86 + /* + * since AVX frequency can be different to base frequency, limit + * use of AVX2 version to later plaforms, not all those that could + * theoretically run it. + */ + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) + return i40e_xmit_pkts_vec_avx2; +#endif + return i40e_xmit_pkts_vec; +} + void __attribute__((cold)) i40e_set_tx_function(struct rte_eth_dev *dev) { @@ -3073,17 +3087,12 @@ i40e_set_tx_function(struct rte_eth_dev *dev) if (ad->tx_simple_allowed) { if (ad->tx_vec_allowed) { PMD_INIT_LOG(DEBUG, "Vector tx finally be used."); - dev->tx_pkt_burst = i40e_xmit_pkts_vec; -#ifdef RTE_ARCH_X86 - /* - * since AVX frequency can be different to base - * frequency, limit use of AVX2 version to later - * plaforms, not all those that could theoretically - * run it. - */ - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) - dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx2; -#endif + if (ad->use_latest_vec) + dev->tx_pkt_burst = + i40e_get_latest_tx_vec(); + else + dev->tx_pkt_burst = + i40e_get_recommend_tx_vec(); } else { PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts_simple; -- 2.17.1