DPDK patches and discussions
 help / color / Atom feed
* [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL
@ 2020-08-07 15:58 Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 01/12] eal: add max SIMD bitwidth Ciara Power
                   ` (21 more replies)
  0 siblings, 22 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power

A number of components in DPDK have optional AVX-512 or other vector
code paths which can be selected at runtime. Rather than having each
component provide its own mechanism to select a code path, this patchset
adds support for a single setting to control what code paths are used.
This can be used to enable some non-default code paths e.g. ones using
AVX-512, but also to limit the code paths to certain vector widths, or
to scalar code only, which is useful for testing.

The max SIMD bitwidth setting can be set by the app itself through use of
the available API, or can be overriden by a commandline argument passed by
the user.

Ciara Power (12):
  eal: add max SIMD bitwidth
  eal: add default SIMD bitwidth values
  net/i40e: add checks for max SIMD bitwidth
  net/axgbe: add checks for max SIMD bitwidth
  net/bnxt: add checks for max SIMD bitwidth
  net/enic: add checks for max SIMD bitwidth
  net/fm10k: add checks for max SIMD bitwidth
  net/iavf: add checks for max SIMD bitwidth
  net/ice: add checks for max SIMD bitwidth
  net/ixgbe: add checks for max SIMD bitwidth
  net/mlx5: add checks for max SIMD bitwidth
  net/virtio: add checks for max SIMD bitwidth

 drivers/net/axgbe/axgbe_rxtx.c             |  3 +-
 drivers/net/bnxt/bnxt_ethdev.c             |  6 ++-
 drivers/net/enic/enic_rxtx_vec_avx2.c      |  3 +-
 drivers/net/fm10k/fm10k_ethdev.c           | 11 ++--
 drivers/net/i40e/i40e_rxtx.c               | 19 ++++---
 drivers/net/iavf/iavf_rxtx.c               | 16 +++---
 drivers/net/ice/ice_rxtx.c                 | 20 ++++---
 drivers/net/ixgbe/ixgbe_rxtx.c             |  7 ++-
 drivers/net/mlx5/mlx5_ethdev.c             |  3 +-
 drivers/net/virtio/virtio_ethdev.c         | 12 +++--
 lib/librte_eal/arm/include/rte_vect.h      |  2 +
 lib/librte_eal/common/eal_common_options.c | 63 ++++++++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/include/generic/rte_vect.h  |  2 +
 lib/librte_eal/include/rte_eal.h           | 31 +++++++++++
 lib/librte_eal/ppc/include/rte_vect.h      |  2 +
 lib/librte_eal/rte_eal_version.map         |  4 ++
 lib/librte_eal/x86/include/rte_vect.h      |  2 +
 19 files changed, 184 insertions(+), 32 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 01/12] eal: add max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
@ 2020-08-07 15:58 ` Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 02/12] eal: add default SIMD bitwidth values Ciara Power
                   ` (20 subsequent siblings)
  21 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power

This patch adds a max SIMD bitwidth EAL configuration. The API allows
for an app to set this value. It can also be set using EAL argument
--force-max-simd-bitwidth, which will lock the value and override any
modifications made by the app.

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_eal/common/eal_common_options.c | 60 ++++++++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/include/rte_eal.h           | 31 +++++++++++
 lib/librte_eal/rte_eal_version.map         |  4 ++
 5 files changed, 105 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index a5426e1234..90f4e8f5c3 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -102,6 +102,7 @@ eal_long_options[] = {
 	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
+	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
 	{0,                     0, NULL, 0                        }
 };
 
@@ -1309,6 +1310,32 @@ eal_parse_iova_mode(const char *name)
 	return 0;
 }
 
+static int
+eal_parse_simd_bitwidth(const char *arg, bool locked)
+{
+	char *end;
+	uint16_t bitwidth;
+	int ret;
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+
+	if (arg == NULL || arg[0] == '\0')
+		return -1;
+
+	errno = 0;
+	bitwidth = strtoul(arg, &end, 0);
+
+	/* check for errors */
+	if ((errno != 0) || end == NULL || (*end != '\0'))
+		return -1;
+
+	ret = rte_set_max_simd_bitwidth(bitwidth);
+	if (ret < 0)
+		return -1;
+	internal_conf->max_simd_bitwidth.locked = locked;
+	return 0;
+}
+
 static int
 eal_parse_base_virtaddr(const char *arg)
 {
@@ -1707,6 +1734,13 @@ eal_parse_common_option(int opt, const char *optarg,
 	case OPT_NO_TELEMETRY_NUM:
 		conf->no_telemetry = 1;
 		break;
+	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
+		if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
+			return -1;
+		}
+		break;
 
 	/* don't know what to do, leave this to caller */
 	default:
@@ -1903,6 +1937,31 @@ eal_check_common_options(struct internal_config *internal_cfg)
 	return 0;
 }
 
+uint16_t
+rte_get_max_simd_bitwidth(void)
+{
+	const struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	return internal_conf->max_simd_bitwidth.bitwidth;
+}
+
+int
+rte_set_max_simd_bitwidth(uint16_t bitwidth)
+{
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	if (internal_conf->max_simd_bitwidth.locked) {
+		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
+		return -EPERM;
+	}
+	if (bitwidth < RTE_NO_SIMD || !rte_is_power_of_2(bitwidth)) {
+		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
+		return -EINVAL;
+	}
+	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
+	return 0;
+}
+
 void
 eal_common_usage(void)
 {
@@ -1981,6 +2040,7 @@ eal_common_usage(void)
 	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
 	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
 	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
+	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
 	       "\nEAL options for DEBUG use only:\n"
 	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
 	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 13f93388a7..367e0cc19e 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -33,6 +33,12 @@ struct hugepage_info {
 	int lock_descriptor;    /**< file descriptor for hugepage dir */
 };
 
+struct simd_bitwidth {
+	/**< flag indicating if bitwidth is locked from further modification */
+	bool locked;
+	uint16_t bitwidth; /**< bitwidth value */
+};
+
 /**
  * internal configuration
  */
@@ -85,6 +91,8 @@ struct internal_config {
 	volatile unsigned int init_complete;
 	/**< indicates whether EAL has completed initialization */
 	unsigned int no_telemetry; /**< true to disable Telemetry */
+	/** max simd bitwidth path to use */
+	struct simd_bitwidth max_simd_bitwidth;
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 89769d48b4..ef33979664 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -85,6 +85,8 @@ enum {
 	OPT_TELEMETRY_NUM,
 #define OPT_NO_TELEMETRY      "no-telemetry"
 	OPT_NO_TELEMETRY_NUM,
+#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
+	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index ddcf6a2e7a..14048fdb74 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -43,6 +43,13 @@ enum rte_proc_type_t {
 	RTE_PROC_INVALID
 };
 
+enum rte_max_simd_t {
+	RTE_NO_SIMD = 64,
+	RTE_MAX_128_SIMD = 128,
+	RTE_MAX_256_SIMD = 256,
+	RTE_MAX_512_SIMD = 512
+};
+
 /**
  * Get the process type in a multi-process setup
  *
@@ -51,6 +58,30 @@ enum rte_proc_type_t {
  */
 enum rte_proc_type_t rte_eal_process_type(void);
 
+/**
+ * Get the supported SIMD bitwidth.
+ *
+ * @return
+ *   uint16_t bitwidth.
+ */
+__rte_experimental
+uint16_t rte_get_max_simd_bitwidth(void);
+
+/**
+ * Set the supported SIMD bitwidth.
+ *
+ * @param bitwidth
+ *   uint16_t bitwidth.
+ * @return
+ *   0 on success.
+ * @return
+ *   -EINVAL on invalid bitwidth parameter.
+ * @return
+ *   -EPERM if bitwidth is locked.
+ */
+__rte_experimental
+int rte_set_max_simd_bitwidth(uint16_t bitwidth);
+
 /**
  * Request iopl privilege for all RPL.
  *
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index bf0c17c233..8059ea76b6 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -403,6 +403,10 @@ EXPERIMENTAL {
 	rte_mp_disable;
 	rte_thread_register;
 	rte_thread_unregister;
+
+	# added in 20.11
+	rte_get_max_simd_bitwidth;
+	rte_set_max_simd_bitwidth;
 };
 
 INTERNAL {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 02/12] eal: add default SIMD bitwidth values
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 01/12] eal: add max SIMD bitwidth Ciara Power
@ 2020-08-07 15:58 ` Ciara Power
  2020-08-07 16:31   ` David Christensen
  2020-08-10  5:22   ` Ruifeng Wang
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 03/12] net/i40e: add checks for max SIMD bitwidth Ciara Power
                   ` (19 subsequent siblings)
  21 siblings, 2 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev
  Cc: bruce.richardson, Ciara Power, Ruifeng Wang, Jerin Jacob,
	Honnappa Nagarahalli, David Christensen

Each arch has a define for the default SIMD bitwidth value, this is used
on EAL init to set the config max SIMD bitwidth.

Cc: Ruifeng Wang <ruifeng.wang@arm.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Cc: David Christensen <drc@linux.vnet.ibm.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_eal/arm/include/rte_vect.h      | 2 ++
 lib/librte_eal/common/eal_common_options.c | 3 +++
 lib/librte_eal/include/generic/rte_vect.h  | 2 ++
 lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
 lib/librte_eal/x86/include/rte_vect.h      | 2 ++
 5 files changed, 11 insertions(+)

diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
index 01c51712a1..7487a53862 100644
--- a/lib/librte_eal/arm/include/rte_vect.h
+++ b/lib/librte_eal/arm/include/rte_vect.h
@@ -14,6 +14,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 256
+
 typedef int32x4_t xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 90f4e8f5c3..c2a9624f89 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -35,6 +35,7 @@
 #ifndef RTE_EXEC_ENV_WINDOWS
 #include <rte_telemetry.h>
 #endif
+#include <rte_vect.h>
 
 #include "eal_internal_cfg.h"
 #include "eal_options.h"
@@ -344,6 +345,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
 	CPU_ZERO(&internal_cfg->ctrl_cpuset);
 	internal_cfg->init_complete = 0;
+	internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
+	internal_cfg->max_simd_bitwidth.locked = 0;
 }
 
 static int
diff --git a/lib/librte_eal/include/generic/rte_vect.h b/lib/librte_eal/include/generic/rte_vect.h
index 3fc47979f8..e98f184a97 100644
--- a/lib/librte_eal/include/generic/rte_vect.h
+++ b/lib/librte_eal/include/generic/rte_vect.h
@@ -14,6 +14,8 @@
 
 #include <stdint.h>
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 256
+
 /* Unsigned vector types */
 
 /**
diff --git a/lib/librte_eal/ppc/include/rte_vect.h b/lib/librte_eal/ppc/include/rte_vect.h
index b0545c878c..70fbd0c423 100644
--- a/lib/librte_eal/ppc/include/rte_vect.h
+++ b/lib/librte_eal/ppc/include/rte_vect.h
@@ -15,6 +15,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 256
+
 typedef vector signed int xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/x86/include/rte_vect.h b/lib/librte_eal/x86/include/rte_vect.h
index df5a607623..b1df75aca7 100644
--- a/lib/librte_eal/x86/include/rte_vect.h
+++ b/lib/librte_eal/x86/include/rte_vect.h
@@ -35,6 +35,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 256
+
 typedef __m128i xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 03/12] net/i40e: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 01/12] eal: add max SIMD bitwidth Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 02/12] eal: add default SIMD bitwidth values Ciara Power
@ 2020-08-07 15:58 ` Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 04/12] net/axgbe: " Ciara Power
                   ` (18 subsequent siblings)
  21 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power, Beilei Xing, Jeff Guo

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Beilei Xing <beilei.xing@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index fe7f9200c1..90f4e26fb8 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3098,7 +3098,8 @@ static eth_rx_burst_t
 i40e_get_latest_rx_vec(bool scatter)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3115,7 +3116,8 @@ i40e_get_recommend_rx_vec(bool scatter)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3154,7 +3156,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 	}
 
-	if (ad->rx_vec_allowed) {
+	if (ad->rx_vec_allowed  && rte_get_max_simd_bitwidth()
+			>= RTE_MAX_128_SIMD) {
 		/* Vec Rx path */
 		PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
 				dev->data->port_id);
@@ -3268,7 +3271,8 @@ static eth_tx_burst_t
 i40e_get_latest_tx_vec(void)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3283,7 +3287,8 @@ i40e_get_recommend_tx_vec(void)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3311,7 +3316,9 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 	}
 
 	if (ad->tx_simple_allowed) {
-		if (ad->tx_vec_allowed) {
+		if (ad->tx_vec_allowed &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
 			if (ad->use_latest_vec)
 				dev->tx_pkt_burst =
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 04/12] net/axgbe: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (2 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 03/12] net/i40e: add checks for max SIMD bitwidth Ciara Power
@ 2020-08-07 15:58 ` " Ciara Power
  2020-08-07 17:49   ` Somalapuram, Amaranath
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 05/12] net/bnxt: " Ciara Power
                   ` (17 subsequent siblings)
  21 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power, Somalapuram Amaranath

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Somalapuram Amaranath <asomalap@amd.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/axgbe/axgbe_rxtx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/axgbe/axgbe_rxtx.c b/drivers/net/axgbe/axgbe_rxtx.c
index 30c467db71..6200954caa 100644
--- a/drivers/net/axgbe/axgbe_rxtx.c
+++ b/drivers/net/axgbe/axgbe_rxtx.c
@@ -553,7 +553,8 @@ int axgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!pdata->tx_queues)
 		pdata->tx_queues = dev->data->tx_queues;
 
-	if (txq->vector_disable)
+	if (txq->vector_disable || rte_get_max_simd_bitwidth()
+			< RTE_MAX_128_SIMD)
 		dev->tx_pkt_burst = &axgbe_xmit_pkts;
 	else
 #ifdef RTE_ARCH_X86
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 05/12] net/bnxt: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (3 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 04/12] net/axgbe: " Ciara Power
@ 2020-08-07 15:58 ` " Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 06/12] net/enic: " Ciara Power
                   ` (16 subsequent siblings)
  21 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power, Ajit Khaparde, Somnath Kotur

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/bnxt/bnxt_ethdev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 510a0d9e0a..626aae8881 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1100,7 +1100,8 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev)
 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
 		DEV_RX_OFFLOAD_RSS_HASH |
 		DEV_RX_OFFLOAD_VLAN_FILTER)) &&
-	    !BNXT_TRUFLOW_EN(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) && rte_get_max_simd_bitwidth()
+			>= RTE_MAX_128_SIMD) {
 		PMD_DRV_LOG(INFO, "Using vector mode receive for port %d\n",
 			    eth_dev->data->port_id);
 		bp->flags |= BNXT_FLAG_RX_VECTOR_PKT_MODE;
@@ -1132,7 +1133,8 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 	 */
 	if (!eth_dev->data->scattered_rx &&
 	    !eth_dev->data->dev_conf.txmode.offloads &&
-	    !BNXT_TRUFLOW_EN(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) &&
+	    rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		PMD_DRV_LOG(INFO, "Using vector mode transmit for port %d\n",
 			    eth_dev->data->port_id);
 		return bnxt_xmit_pkts_vec;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 06/12] net/enic: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (4 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 05/12] net/bnxt: " Ciara Power
@ 2020-08-07 15:58 ` " Ciara Power
  2020-08-10  4:50   ` Hyong Youb Kim (hyonkim)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 07/12] net/fm10k: " Ciara Power
                   ` (15 subsequent siblings)
  21 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power, John Daley, Hyong Youb Kim

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: John Daley <johndale@cisco.com>
Cc: Hyong Youb Kim <hyonkim@cisco.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/enic/enic_rxtx_vec_avx2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c b/drivers/net/enic/enic_rxtx_vec_avx2.c
index 676b9f5fdb..5db43bdbb8 100644
--- a/drivers/net/enic/enic_rxtx_vec_avx2.c
+++ b/drivers/net/enic/enic_rxtx_vec_avx2.c
@@ -821,7 +821,8 @@ enic_use_vector_rx_handler(struct rte_eth_dev *eth_dev)
 	fconf = &eth_dev->data->dev_conf.fdir_conf;
 	if (fconf->mode != RTE_FDIR_MODE_NONE)
 		return false;
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD) {
 		ENICPMD_LOG(DEBUG, " use the non-scatter avx2 Rx handler");
 		eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
 		enic->use_noscatter_vec_rx_handler = 1;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 07/12] net/fm10k: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (5 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 06/12] net/enic: " Ciara Power
@ 2020-08-07 15:58 ` " Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 08/12] net/iavf: " Ciara Power
                   ` (14 subsequent siblings)
  21 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power, Qi Zhang, Xiao Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qi Zhang <qi.z.zhang@intel.com>
Cc: Xiao Wang <xiao.w.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/fm10k/fm10k_ethdev.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index b574693bca..f7c41d4377 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -2937,7 +2937,9 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
 		/* primary process has set the ftag flag and offloads */
 		txq = dev->data->tx_queues[0];
-		if (fm10k_tx_vec_condition_check(txq)) {
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth()
+				< RTE_MAX_128_SIMD) {
 			dev->tx_pkt_burst = fm10k_xmit_pkts;
 			dev->tx_pkt_prepare = fm10k_prep_pkts;
 			PMD_INIT_LOG(DEBUG, "Use regular Tx func");
@@ -2956,7 +2958,8 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 		txq = dev->data->tx_queues[i];
 		txq->tx_ftag_en = tx_ftag_en;
 		/* Check if Vector Tx is satisfied */
-		if (fm10k_tx_vec_condition_check(txq))
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD)
 			use_sse = 0;
 	}
 
@@ -2990,7 +2993,9 @@ fm10k_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met.
 	 */
 	if (!fm10k_rx_vec_condition_check(dev) &&
-			dev_info->rx_vec_allowed && !rx_ftag_en) {
+			dev_info->rx_vec_allowed && !rx_ftag_en &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 		if (dev->data->scattered_rx)
 			dev->rx_pkt_burst = fm10k_recv_scattered_pkts_vec;
 		else
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 08/12] net/iavf: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (6 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 07/12] net/fm10k: " Ciara Power
@ 2020-08-07 15:58 ` " Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 09/12] net/ice: " Ciara Power
                   ` (13 subsequent siblings)
  21 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power, Jingjing Wu, Beilei Xing

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Jingjing Wu <jingjing.wu@intel.com>
Cc: Beilei Xing <beilei.xing@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 05a7dd898a..b798d082a2 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2105,14 +2105,16 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_rx_vec_dev_check(dev)) {
+	if (!iavf_rx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
 			rxq = dev->data->rx_queues[i];
 			(void)iavf_rxq_vec_setup(rxq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 			use_avx2 = true;
 
 		if (dev->data->scattered_rx) {
@@ -2178,7 +2180,8 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_tx_vec_dev_check(dev)) {
+	if (!iavf_tx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
 			txq = dev->data->tx_queues[i];
 			if (!txq)
@@ -2186,8 +2189,9 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 			iavf_txq_vec_setup(txq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 			use_avx2 = true;
 
 		PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 09/12] net/ice: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (7 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 08/12] net/iavf: " Ciara Power
@ 2020-08-07 15:58 ` " Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 10/12] net/ixgbe: " Ciara Power
                   ` (12 subsequent siblings)
  21 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power, Qiming Yang, Qi Zhang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qiming Yang <qiming.yang@intel.com>
Cc: Qi Zhang <qi.z.zhang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/ice/ice_rxtx.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 2e1f06d2c0..eda2d9a8c7 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -2889,7 +2889,9 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed) {
+		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			ad->rx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
 				rxq = dev->data->rx_queues[i];
@@ -2899,8 +2901,10 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_MAX_256_SIMD)
 				use_avx2 = true;
 
 		} else {
@@ -3067,7 +3071,9 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_tx_vec_dev_check(dev)) {
+		if (!ice_tx_vec_dev_check(dev) &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			ad->tx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_tx_queues; i++) {
 				txq = dev->data->tx_queues[i];
@@ -3077,8 +3083,10 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_MAX_256_SIMD)
 				use_avx2 = true;
 
 		} else {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 10/12] net/ixgbe: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (8 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 09/12] net/ice: " Ciara Power
@ 2020-08-07 15:58 ` " Ciara Power
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 11/12] net/mlx5: " Ciara Power
                   ` (11 subsequent siblings)
  21 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson, Ciara Power, Wei Zhao, Jeff Guo

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Wei Zhao <wei.zhao1@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 977ecf5137..eadc7183f2 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
 		dev->tx_pkt_prepare = NULL;
 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
-					ixgbe_txq_vec_setup(txq) == 0)) {
+					ixgbe_txq_vec_setup(txq) == 0) &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
 			dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
 		} else
@@ -4743,7 +4745,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met and Rx Bulk Allocation should be allowed.
 	 */
 	if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
-	    !adapter->rx_bulk_alloc_allowed) {
+	    !adapter->rx_bulk_alloc_allowed ||
+			rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD) {
 		PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
 				    "preconditions",
 			     dev->data->port_id);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 11/12] net/mlx5: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (9 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 10/12] net/ixgbe: " Ciara Power
@ 2020-08-07 15:58 ` " Ciara Power
  2020-08-10 17:26   ` Alexander Kozyrev
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 12/12] net/virtio: " Ciara Power
                   ` (10 subsequent siblings)
  21 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev
  Cc: bruce.richardson, Ciara Power, Matan Azrad, Shahaf Shuler,
	Viacheslav Ovsiienko

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Matan Azrad <matan@mellanox.com>
Cc: Shahaf Shuler <shahafs@mellanox.com>
Cc: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/mlx5/mlx5_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index cefb45064e..f322f82029 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -479,7 +479,8 @@ mlx5_select_rx_function(struct rte_eth_dev *dev)
 	eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst;
 
 	MLX5_ASSERT(dev != NULL);
-	if (mlx5_check_vec_rx_support(dev) > 0) {
+	if (mlx5_check_vec_rx_support(dev) > 0 &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		rx_pkt_burst = mlx5_rx_burst_vec;
 		DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
 			dev->data->port_id);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH 20.11 12/12] net/virtio: add checks for max SIMD bitwidth
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (10 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 11/12] net/mlx5: " Ciara Power
@ 2020-08-07 15:58 ` " Ciara Power
  2020-08-07 16:19 ` [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Stephen Hemminger
                   ` (9 subsequent siblings)
  21 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-07 15:58 UTC (permalink / raw)
  To: dev
  Cc: bruce.richardson, Ciara Power, Maxime Coquelin, Chenbo Xia, Zhihong Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: Chenbo Xia <chenbo.xia@intel.com>
Cc: Zhihong Wang <zhihong.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index dc0093bdf0..f779ce8396 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1517,9 +1517,11 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 	if (vtpci_packed_queue(hw)) {
 		PMD_INIT_LOG(INFO,
 			"virtio: using packed ring %s Tx path on port %u",
-			hw->use_vec_tx ? "vectorized" : "standard",
+			(hw->use_vec_tx && rte_get_max_simd_bitwidth()
+			> RTE_MAX_256_SIMD) ? "vectorized" : "standard",
 			eth_dev->data->port_id);
-		if (hw->use_vec_tx)
+		if (hw->use_vec_tx && rte_get_max_simd_bitwidth()
+				> RTE_MAX_256_SIMD)
 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
 		else
 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
@@ -1536,7 +1538,8 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 	}
 
 	if (vtpci_packed_queue(hw)) {
-		if (hw->use_vec_rx) {
+		if (hw->use_vec_rx && rte_get_max_simd_bitwidth()
+				> RTE_MAX_256_SIMD) {
 			PMD_INIT_LOG(INFO,
 				"virtio: using packed ring vectorized Rx path on port %u",
 				eth_dev->data->port_id);
@@ -1555,7 +1558,8 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
 		}
 	} else {
-		if (hw->use_vec_rx) {
+		if (hw->use_vec_rx && rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
 				eth_dev->data->port_id);
 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (11 preceding siblings ...)
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 12/12] net/virtio: " Ciara Power
@ 2020-08-07 16:19 ` Stephen Hemminger
  2020-08-10  9:52   ` Power, Ciara
  2020-08-11  5:36 ` Honnappa Nagarahalli
                   ` (8 subsequent siblings)
  21 siblings, 1 reply; 276+ messages in thread
From: Stephen Hemminger @ 2020-08-07 16:19 UTC (permalink / raw)
  To: Ciara Power; +Cc: dev, bruce.richardson

On Fri,  7 Aug 2020 16:58:47 +0100
Ciara Power <ciara.power@intel.com> wrote:

> A number of components in DPDK have optional AVX-512 or other vector
> code paths which can be selected at runtime. Rather than having each
> component provide its own mechanism to select a code path, this patchset
> adds support for a single setting to control what code paths are used.
> This can be used to enable some non-default code paths e.g. ones using
> AVX-512, but also to limit the code paths to certain vector widths, or
> to scalar code only, which is useful for testing.
> 
> The max SIMD bitwidth setting can be set by the app itself through use of
> the available API, or can be overriden by a commandline argument passed by
> the user.
> 
> Ciara Power (12):
>   eal: add max SIMD bitwidth
>   eal: add default SIMD bitwidth values
>   net/i40e: add checks for max SIMD bitwidth
>   net/axgbe: add checks for max SIMD bitwidth
>   net/bnxt: add checks for max SIMD bitwidth
>   net/enic: add checks for max SIMD bitwidth
>   net/fm10k: add checks for max SIMD bitwidth
>   net/iavf: add checks for max SIMD bitwidth
>   net/ice: add checks for max SIMD bitwidth
>   net/ixgbe: add checks for max SIMD bitwidth
>   net/mlx5: add checks for max SIMD bitwidth
>   net/virtio: add checks for max SIMD bitwidth
> 
>  drivers/net/axgbe/axgbe_rxtx.c             |  3 +-
>  drivers/net/bnxt/bnxt_ethdev.c             |  6 ++-
>  drivers/net/enic/enic_rxtx_vec_avx2.c      |  3 +-
>  drivers/net/fm10k/fm10k_ethdev.c           | 11 ++--
>  drivers/net/i40e/i40e_rxtx.c               | 19 ++++---
>  drivers/net/iavf/iavf_rxtx.c               | 16 +++---
>  drivers/net/ice/ice_rxtx.c                 | 20 ++++---
>  drivers/net/ixgbe/ixgbe_rxtx.c             |  7 ++-
>  drivers/net/mlx5/mlx5_ethdev.c             |  3 +-
>  drivers/net/virtio/virtio_ethdev.c         | 12 +++--
>  lib/librte_eal/arm/include/rte_vect.h      |  2 +
>  lib/librte_eal/common/eal_common_options.c | 63 ++++++++++++++++++++++
>  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>  lib/librte_eal/common/eal_options.h        |  2 +
>  lib/librte_eal/include/generic/rte_vect.h  |  2 +
>  lib/librte_eal/include/rte_eal.h           | 31 +++++++++++
>  lib/librte_eal/ppc/include/rte_vect.h      |  2 +
>  lib/librte_eal/rte_eal_version.map         |  4 ++
>  lib/librte_eal/x86/include/rte_vect.h      |  2 +
>  19 files changed, 184 insertions(+), 32 deletions(-)
> 

This looks useful, could you add some documentation on rationale
and how you expect application to set it.

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 02/12] eal: add default SIMD bitwidth values
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 02/12] eal: add default SIMD bitwidth values Ciara Power
@ 2020-08-07 16:31   ` David Christensen
  2020-08-07 16:59     ` David Christensen
  2020-08-12 11:28     ` Power, Ciara
  2020-08-10  5:22   ` Ruifeng Wang
  1 sibling, 2 replies; 276+ messages in thread
From: David Christensen @ 2020-08-07 16:31 UTC (permalink / raw)
  To: Ciara Power, dev
  Cc: bruce.richardson, Ruifeng Wang, Jerin Jacob, Honnappa Nagarahalli

On 8/7/20 8:58 AM, Ciara Power wrote:
> Each arch has a define for the default SIMD bitwidth value, this is used
> on EAL init to set the config max SIMD bitwidth.

What's the intended use case?

> 
> Cc: Ruifeng Wang <ruifeng.wang@arm.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Cc: David Christensen <drc@linux.vnet.ibm.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>   lib/librte_eal/arm/include/rte_vect.h      | 2 ++
>   lib/librte_eal/common/eal_common_options.c | 3 +++
>   lib/librte_eal/include/generic/rte_vect.h  | 2 ++
>   lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
>   lib/librte_eal/x86/include/rte_vect.h      | 2 ++
>   5 files changed, 11 insertions(+)
> 
> diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
> index 01c51712a1..7487a53862 100644
> --- a/lib/librte_eal/arm/include/rte_vect.h
> +++ b/lib/librte_eal/arm/include/rte_vect.h
> @@ -14,6 +14,8 @@
>   extern "C" {
>   #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>   typedef int32x4_t xmm_t;
> 
>   #define	XMM_SIZE	(sizeof(xmm_t))
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index 90f4e8f5c3..c2a9624f89 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -35,6 +35,7 @@
>   #ifndef RTE_EXEC_ENV_WINDOWS
>   #include <rte_telemetry.h>
>   #endif
> +#include <rte_vect.h>
> 
>   #include "eal_internal_cfg.h"
>   #include "eal_options.h"
> @@ -344,6 +345,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
>   	internal_cfg->user_mbuf_pool_ops_name = NULL;
>   	CPU_ZERO(&internal_cfg->ctrl_cpuset);
>   	internal_cfg->init_complete = 0;
> +	internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
> +	internal_cfg->max_simd_bitwidth.locked = 0;
>   }

Build error on HEAD:

../lib/librte_eal/common/eal_common_options.c: In function 
‘eal_reset_internal_config’:
../lib/librte_eal/common/eal_common_options.c:347:14: error: ‘struct 
internal_config’ has no member named ‘max_simd_bitwidth’
   internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
               ^~
../lib/librte_eal/common/eal_common_options.c:348:14: error: ‘struct 
internal_config’ has no member named ‘max_simd_bitwidth’
   internal_cfg->max_simd_bitwidth.locked = 0;
               ^~

Dave

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 02/12] eal: add default SIMD bitwidth values
  2020-08-07 16:31   ` David Christensen
@ 2020-08-07 16:59     ` David Christensen
  2020-08-12 11:28     ` Power, Ciara
  1 sibling, 0 replies; 276+ messages in thread
From: David Christensen @ 2020-08-07 16:59 UTC (permalink / raw)
  To: Ciara Power, dev
  Cc: bruce.richardson, Ruifeng Wang, Jerin Jacob, Honnappa Nagarahalli



On 8/7/20 9:31 AM, David Christensen wrote:
> On 8/7/20 8:58 AM, Ciara Power wrote:
>> Each arch has a define for the default SIMD bitwidth value, this is used
>> on EAL init to set the config max SIMD bitwidth.
> 
> What's the intended use case?
> 
>>
>> Cc: Ruifeng Wang <ruifeng.wang@arm.com>
>> Cc: Jerin Jacob <jerinj@marvell.com>
>> Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>> Cc: David Christensen <drc@linux.vnet.ibm.com>
>>
>> Signed-off-by: Ciara Power <ciara.power@intel.com>
>> ---
>>   lib/librte_eal/arm/include/rte_vect.h      | 2 ++
>>   lib/librte_eal/common/eal_common_options.c | 3 +++
>>   lib/librte_eal/include/generic/rte_vect.h  | 2 ++
>>   lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
>>   lib/librte_eal/x86/include/rte_vect.h      | 2 ++
>>   5 files changed, 11 insertions(+)
>>
>> diff --git a/lib/librte_eal/arm/include/rte_vect.h 
>> b/lib/librte_eal/arm/include/rte_vect.h
>> index 01c51712a1..7487a53862 100644
>> --- a/lib/librte_eal/arm/include/rte_vect.h
>> +++ b/lib/librte_eal/arm/include/rte_vect.h
>> @@ -14,6 +14,8 @@
>>   extern "C" {
>>   #endif
>>
>> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
>> +
>>   typedef int32x4_t xmm_t;
>>
>>   #define    XMM_SIZE    (sizeof(xmm_t))
>> diff --git a/lib/librte_eal/common/eal_common_options.c 
>> b/lib/librte_eal/common/eal_common_options.c
>> index 90f4e8f5c3..c2a9624f89 100644
>> --- a/lib/librte_eal/common/eal_common_options.c
>> +++ b/lib/librte_eal/common/eal_common_options.c
>> @@ -35,6 +35,7 @@
>>   #ifndef RTE_EXEC_ENV_WINDOWS
>>   #include <rte_telemetry.h>
>>   #endif
>> +#include <rte_vect.h>
>>
>>   #include "eal_internal_cfg.h"
>>   #include "eal_options.h"
>> @@ -344,6 +345,8 @@ eal_reset_internal_config(struct internal_config 
>> *internal_cfg)
>>       internal_cfg->user_mbuf_pool_ops_name = NULL;
>>       CPU_ZERO(&internal_cfg->ctrl_cpuset);
>>       internal_cfg->init_complete = 0;
>> +    internal_cfg->max_simd_bitwidth.bitwidth = 
>> RTE_DEFAULT_SIMD_BITWIDTH;
>> +    internal_cfg->max_simd_bitwidth.locked = 0;
>>   }
> 
> Build error on HEAD:
> 
> ../lib/librte_eal/common/eal_common_options.c: In function 
> ‘eal_reset_internal_config’:
> ../lib/librte_eal/common/eal_common_options.c:347:14: error: ‘struct 
> internal_config’ has no member named ‘max_simd_bitwidth’
>    internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
>                ^~
> ../lib/librte_eal/common/eal_common_options.c:348:14: error: ‘struct 
> internal_config’ has no member named ‘max_simd_bitwidth’
>    internal_cfg->max_simd_bitwidth.locked = 0;
>                ^~

Sorry, jumped the gun when testing the patch, missed the preceeding patch.

Dave

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 04/12] net/axgbe: add checks for max SIMD bitwidth
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 04/12] net/axgbe: " Ciara Power
@ 2020-08-07 17:49   ` Somalapuram, Amaranath
  0 siblings, 0 replies; 276+ messages in thread
From: Somalapuram, Amaranath @ 2020-08-07 17:49 UTC (permalink / raw)
  To: Ciara Power, dev, Sebastian, Selwin; +Cc: bruce.richardson, Ciara Power

[AMD Official Use Only - Internal Distribution Only]

++selwin
Please check.

Get Outlook for Android<https://aka.ms/ghei36>
________________________________
From: Ciara Power <ciara.power@intel.com>
Sent: Friday, August 7, 2020 9:28:51 PM
To: dev@dpdk.org <dev@dpdk.org>
Cc: bruce.richardson@intel.com <bruce.richardson@intel.com>; Ciara Power <ciara.power@intel.com>; Somalapuram, Amaranath <Amaranath.Somalapuram@amd.com>
Subject: [PATCH 20.11 04/12] net/axgbe: add checks for max SIMD bitwidth

[CAUTION: External Email]

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Somalapuram Amaranath <asomalap@amd.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/axgbe/axgbe_rxtx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/axgbe/axgbe_rxtx.c b/drivers/net/axgbe/axgbe_rxtx.c
index 30c467db71..6200954caa 100644
--- a/drivers/net/axgbe/axgbe_rxtx.c
+++ b/drivers/net/axgbe/axgbe_rxtx.c
@@ -553,7 +553,8 @@ int axgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        if (!pdata->tx_queues)
                pdata->tx_queues = dev->data->tx_queues;

-       if (txq->vector_disable)
+       if (txq->vector_disable || rte_get_max_simd_bitwidth()
+                       < RTE_MAX_128_SIMD)
                dev->tx_pkt_burst = &axgbe_xmit_pkts;
        else
 #ifdef RTE_ARCH_X86
--
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 06/12] net/enic: add checks for max SIMD bitwidth
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 06/12] net/enic: " Ciara Power
@ 2020-08-10  4:50   ` Hyong Youb Kim (hyonkim)
  0 siblings, 0 replies; 276+ messages in thread
From: Hyong Youb Kim (hyonkim) @ 2020-08-10  4:50 UTC (permalink / raw)
  To: Ciara Power, dev; +Cc: bruce.richardson, John Daley (johndale)

> -----Original Message-----
> From: Ciara Power <ciara.power@intel.com>
> Sent: Saturday, August 8, 2020 12:59 AM
> To: dev@dpdk.org
> Cc: bruce.richardson@intel.com; Ciara Power <ciara.power@intel.com>;
> John Daley (johndale) <johndale@cisco.com>; Hyong Youb Kim (hyonkim)
> <hyonkim@cisco.com>
> Subject: [PATCH 20.11 06/12] net/enic: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> Cc: John Daley <johndale@cisco.com>
> Cc: Hyong Youb Kim <hyonkim@cisco.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  drivers/net/enic/enic_rxtx_vec_avx2.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c
> b/drivers/net/enic/enic_rxtx_vec_avx2.c
> index 676b9f5fdb..5db43bdbb8 100644
> --- a/drivers/net/enic/enic_rxtx_vec_avx2.c
> +++ b/drivers/net/enic/enic_rxtx_vec_avx2.c
> @@ -821,7 +821,8 @@ enic_use_vector_rx_handler(struct rte_eth_dev
> *eth_dev)
>  	fconf = &eth_dev->data->dev_conf.fdir_conf;
>  	if (fconf->mode != RTE_FDIR_MODE_NONE)
>  		return false;
> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> +			rte_get_max_simd_bitwidth() >=
> RTE_MAX_256_SIMD) {
>  		ENICPMD_LOG(DEBUG, " use the non-scatter avx2 Rx
> handler");
>  		eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
>  		enic->use_noscatter_vec_rx_handler = 1;
> --
> 2.17.1


Acked-by: Hyong Youb Kim <hyonkim@cisco.com>

Thanks..
-Hyong


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 02/12] eal: add default SIMD bitwidth values
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 02/12] eal: add default SIMD bitwidth values Ciara Power
  2020-08-07 16:31   ` David Christensen
@ 2020-08-10  5:22   ` Ruifeng Wang
  1 sibling, 0 replies; 276+ messages in thread
From: Ruifeng Wang @ 2020-08-10  5:22 UTC (permalink / raw)
  To: Ciara Power, dev
  Cc: bruce.richardson, jerinj, Honnappa Nagarahalli, David Christensen, nd


> -----Original Message-----
> From: Ciara Power <ciara.power@intel.com>
> Sent: Friday, August 7, 2020 11:59 PM
> To: dev@dpdk.org
> Cc: bruce.richardson@intel.com; Ciara Power <ciara.power@intel.com>;
> Ruifeng Wang <Ruifeng.Wang@arm.com>; jerinj@marvell.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; David Christensen
> <drc@linux.vnet.ibm.com>
> Subject: [PATCH 20.11 02/12] eal: add default SIMD bitwidth values
> 
> Each arch has a define for the default SIMD bitwidth value, this is used on EAL
> init to set the config max SIMD bitwidth.
> 
> Cc: Ruifeng Wang <ruifeng.wang@arm.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Cc: David Christensen <drc@linux.vnet.ibm.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  lib/librte_eal/arm/include/rte_vect.h      | 2 ++
>  lib/librte_eal/common/eal_common_options.c | 3 +++
> lib/librte_eal/include/generic/rte_vect.h  | 2 ++
>  lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
>  lib/librte_eal/x86/include/rte_vect.h      | 2 ++
>  5 files changed, 11 insertions(+)
> 
> diff --git a/lib/librte_eal/arm/include/rte_vect.h
> b/lib/librte_eal/arm/include/rte_vect.h
> index 01c51712a1..7487a53862 100644
> --- a/lib/librte_eal/arm/include/rte_vect.h
> +++ b/lib/librte_eal/arm/include/rte_vect.h
> @@ -14,6 +14,8 @@
>  extern "C" {
>  #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256

I think for arm platform we should set it to '128'. It is the bit width of NEON registers.

> +
>  typedef int32x4_t xmm_t;
> 
>  #define	XMM_SIZE	(sizeof(xmm_t))
> diff --git a/lib/librte_eal/common/eal_common_options.c
> b/lib/librte_eal/common/eal_common_options.c
> index 90f4e8f5c3..c2a9624f89 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -35,6 +35,7 @@
>  #ifndef RTE_EXEC_ENV_WINDOWS
>  #include <rte_telemetry.h>
>  #endif
> +#include <rte_vect.h>
> 
>  #include "eal_internal_cfg.h"
>  #include "eal_options.h"
> @@ -344,6 +345,8 @@ eal_reset_internal_config(struct internal_config
> *internal_cfg)
>  	internal_cfg->user_mbuf_pool_ops_name = NULL;
>  	CPU_ZERO(&internal_cfg->ctrl_cpuset);
>  	internal_cfg->init_complete = 0;
> +	internal_cfg->max_simd_bitwidth.bitwidth =
> RTE_DEFAULT_SIMD_BITWIDTH;
> +	internal_cfg->max_simd_bitwidth.locked = 0;
>  }
> 
>  static int
> diff --git a/lib/librte_eal/include/generic/rte_vect.h
> b/lib/librte_eal/include/generic/rte_vect.h
> index 3fc47979f8..e98f184a97 100644
> --- a/lib/librte_eal/include/generic/rte_vect.h
> +++ b/lib/librte_eal/include/generic/rte_vect.h
> @@ -14,6 +14,8 @@
> 
>  #include <stdint.h>
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  /* Unsigned vector types */
> 
>  /**
> diff --git a/lib/librte_eal/ppc/include/rte_vect.h
> b/lib/librte_eal/ppc/include/rte_vect.h
> index b0545c878c..70fbd0c423 100644
> --- a/lib/librte_eal/ppc/include/rte_vect.h
> +++ b/lib/librte_eal/ppc/include/rte_vect.h
> @@ -15,6 +15,8 @@
>  extern "C" {
>  #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  typedef vector signed int xmm_t;
> 
>  #define	XMM_SIZE	(sizeof(xmm_t))
> diff --git a/lib/librte_eal/x86/include/rte_vect.h
> b/lib/librte_eal/x86/include/rte_vect.h
> index df5a607623..b1df75aca7 100644
> --- a/lib/librte_eal/x86/include/rte_vect.h
> +++ b/lib/librte_eal/x86/include/rte_vect.h
> @@ -35,6 +35,8 @@
>  extern "C" {
>  #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  typedef __m128i xmm_t;
> 
>  #define	XMM_SIZE	(sizeof(xmm_t))
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL
  2020-08-07 16:19 ` [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Stephen Hemminger
@ 2020-08-10  9:52   ` Power, Ciara
  0 siblings, 0 replies; 276+ messages in thread
From: Power, Ciara @ 2020-08-10  9:52 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Richardson, Bruce, Van Haaren, Harry, Stokes, Ian

Hi Stephen,

To give an overview of the rationale behind the patchset:
-  It allows other apps such as OVS and VPP which already make use of
   AVX-512 to indicate that they are happy for DPDK to use AVX-512 too.
-  It allows the end-user to override those settings if so desired.
-  It allows an easy way for the user to test with different vector paths by
   limiting bitwidths.

I can add some documentation for this in a v2, thanks for the suggestion.

- Ciara


>-----Original Message-----
>From: Stephen Hemminger <stephen@networkplumber.org>
>Sent: Friday 7 August 2020 17:19
>To: Power, Ciara <ciara.power@intel.com>
>Cc: dev@dpdk.org; Richardson, Bruce <bruce.richardson@intel.com>
>Subject: Re: [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL
>
>On Fri,  7 Aug 2020 16:58:47 +0100
>Ciara Power <ciara.power@intel.com> wrote:
>
>> A number of components in DPDK have optional AVX-512 or other vector
>> code paths which can be selected at runtime. Rather than having each
>> component provide its own mechanism to select a code path, this
>> patchset adds support for a single setting to control what code paths are
>used.
>> This can be used to enable some non-default code paths e.g. ones using
>> AVX-512, but also to limit the code paths to certain vector widths, or
>> to scalar code only, which is useful for testing.
>>
>> The max SIMD bitwidth setting can be set by the app itself through use
>> of the available API, or can be overriden by a commandline argument
>> passed by the user.
>>
>> Ciara Power (12):
>>   eal: add max SIMD bitwidth
>>   eal: add default SIMD bitwidth values
>>   net/i40e: add checks for max SIMD bitwidth
>>   net/axgbe: add checks for max SIMD bitwidth
>>   net/bnxt: add checks for max SIMD bitwidth
>>   net/enic: add checks for max SIMD bitwidth
>>   net/fm10k: add checks for max SIMD bitwidth
>>   net/iavf: add checks for max SIMD bitwidth
>>   net/ice: add checks for max SIMD bitwidth
>>   net/ixgbe: add checks for max SIMD bitwidth
>>   net/mlx5: add checks for max SIMD bitwidth
>>   net/virtio: add checks for max SIMD bitwidth
>>
>>  drivers/net/axgbe/axgbe_rxtx.c             |  3 +-
>>  drivers/net/bnxt/bnxt_ethdev.c             |  6 ++-
>>  drivers/net/enic/enic_rxtx_vec_avx2.c      |  3 +-
>>  drivers/net/fm10k/fm10k_ethdev.c           | 11 ++--
>>  drivers/net/i40e/i40e_rxtx.c               | 19 ++++---
>>  drivers/net/iavf/iavf_rxtx.c               | 16 +++---
>>  drivers/net/ice/ice_rxtx.c                 | 20 ++++---
>>  drivers/net/ixgbe/ixgbe_rxtx.c             |  7 ++-
>>  drivers/net/mlx5/mlx5_ethdev.c             |  3 +-
>>  drivers/net/virtio/virtio_ethdev.c         | 12 +++--
>>  lib/librte_eal/arm/include/rte_vect.h      |  2 +
>>  lib/librte_eal/common/eal_common_options.c | 63
>++++++++++++++++++++++
>>  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>>  lib/librte_eal/common/eal_options.h        |  2 +
>>  lib/librte_eal/include/generic/rte_vect.h  |  2 +
>>  lib/librte_eal/include/rte_eal.h           | 31 +++++++++++
>>  lib/librte_eal/ppc/include/rte_vect.h      |  2 +
>>  lib/librte_eal/rte_eal_version.map         |  4 ++
>>  lib/librte_eal/x86/include/rte_vect.h      |  2 +
>>  19 files changed, 184 insertions(+), 32 deletions(-)
>>
>
>This looks useful, could you add some documentation on rationale and how
>you expect application to set it.

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 11/12] net/mlx5: add checks for max SIMD bitwidth
  2020-08-07 15:58 ` [dpdk-dev] [PATCH 20.11 11/12] net/mlx5: " Ciara Power
@ 2020-08-10 17:26   ` Alexander Kozyrev
  0 siblings, 0 replies; 276+ messages in thread
From: Alexander Kozyrev @ 2020-08-10 17:26 UTC (permalink / raw)
  To: Ciara Power, dev
  Cc: bruce.richardson, Matan Azrad, Shahaf Shuler, Viacheslav Ovsiienko

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power
> Sent: Friday, August 7, 2020 11:59
> To: dev@dpdk.org
> Cc: bruce.richardson@intel.com; Ciara Power <ciara.power@intel.com>; Matan
> Azrad <matan@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>;
> Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> Subject: [dpdk-dev] [PATCH 20.11 11/12] net/mlx5: add checks for max SIMD
> bitwidth
> 
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path.
> 
> Cc: Matan Azrad <matan@mellanox.com>
> Cc: Shahaf Shuler <shahafs@mellanox.com>
> Cc: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  drivers/net/mlx5/mlx5_ethdev.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index cefb45064e..f322f82029 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -479,7 +479,8 @@ mlx5_select_rx_function(struct rte_eth_dev *dev)
>  	eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst;
> 
>  	MLX5_ASSERT(dev != NULL);
> -	if (mlx5_check_vec_rx_support(dev) > 0) {
> +	if (mlx5_check_vec_rx_support(dev) > 0 &&
> +			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
> {
>  		rx_pkt_burst = mlx5_rx_burst_vec;
>  		DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
>  			dev->data->port_id);
> --
> 2.17.1

Hi Ciara, what do you think about moving this condition inside the mlx5_check_vec_rx_support() function?

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (12 preceding siblings ...)
  2020-08-07 16:19 ` [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Stephen Hemminger
@ 2020-08-11  5:36 ` Honnappa Nagarahalli
  2020-08-12 11:39   ` Power, Ciara
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                   ` (7 subsequent siblings)
  21 siblings, 1 reply; 276+ messages in thread
From: Honnappa Nagarahalli @ 2020-08-11  5:36 UTC (permalink / raw)
  To: Ciara Power, dev; +Cc: bruce.richardson, nd, Honnappa Nagarahalli, nd

Hi Ciara,
	I have not reviewed other patches in this series yet. Few questions inline.

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power
> Sent: Friday, August 7, 2020 10:59 AM
> To: dev@dpdk.org
> Cc: bruce.richardson@intel.com; Ciara Power <ciara.power@intel.com>
> Subject: [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL
> 
> A number of components in DPDK have optional AVX-512 or other vector code
> paths which can be selected at runtime. Rather than having each component
> provide its own mechanism to select a code path, this patchset adds support
> for a single setting to control what code paths are used.
Do you mean that all the components will have to use AVX-512?
IMO, different libraries might behave differently to the use of different vector sizes. Are we taking away the ability to use different vector sizes for different components.

> This can be used to enable some non-default code paths e.g. ones using AVX-
> 512, but also to limit the code paths to certain vector widths, or to scalar
> code only, which is useful for testing.
> 
> The max SIMD bitwidth setting can be set by the app itself through use of the
> available API, or can be overriden by a commandline argument passed by the
> user.
Arm platforms support SVE (scalable vector extensions) feature. With this feature, the code is agnostic to the vector size. i.e. same code can run on various vector sizes. There is no code yet in DPDK with this feature. But, it will be added in the near future. It would be good to handle this now so that we do not have issues in the future..

> 
> Ciara Power (12):
>   eal: add max SIMD bitwidth
>   eal: add default SIMD bitwidth values
>   net/i40e: add checks for max SIMD bitwidth
>   net/axgbe: add checks for max SIMD bitwidth
>   net/bnxt: add checks for max SIMD bitwidth
>   net/enic: add checks for max SIMD bitwidth
>   net/fm10k: add checks for max SIMD bitwidth
>   net/iavf: add checks for max SIMD bitwidth
>   net/ice: add checks for max SIMD bitwidth
>   net/ixgbe: add checks for max SIMD bitwidth
>   net/mlx5: add checks for max SIMD bitwidth
>   net/virtio: add checks for max SIMD bitwidth
> 
>  drivers/net/axgbe/axgbe_rxtx.c             |  3 +-
>  drivers/net/bnxt/bnxt_ethdev.c             |  6 ++-
>  drivers/net/enic/enic_rxtx_vec_avx2.c      |  3 +-
>  drivers/net/fm10k/fm10k_ethdev.c           | 11 ++--
>  drivers/net/i40e/i40e_rxtx.c               | 19 ++++---
>  drivers/net/iavf/iavf_rxtx.c               | 16 +++---
>  drivers/net/ice/ice_rxtx.c                 | 20 ++++---
>  drivers/net/ixgbe/ixgbe_rxtx.c             |  7 ++-
>  drivers/net/mlx5/mlx5_ethdev.c             |  3 +-
>  drivers/net/virtio/virtio_ethdev.c         | 12 +++--
>  lib/librte_eal/arm/include/rte_vect.h      |  2 +
>  lib/librte_eal/common/eal_common_options.c | 63
> ++++++++++++++++++++++
>  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>  lib/librte_eal/common/eal_options.h        |  2 +
>  lib/librte_eal/include/generic/rte_vect.h  |  2 +
>  lib/librte_eal/include/rte_eal.h           | 31 +++++++++++
>  lib/librte_eal/ppc/include/rte_vect.h      |  2 +
>  lib/librte_eal/rte_eal_version.map         |  4 ++
>  lib/librte_eal/x86/include/rte_vect.h      |  2 +
>  19 files changed, 184 insertions(+), 32 deletions(-)
> 
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 02/12] eal: add default SIMD bitwidth values
  2020-08-07 16:31   ` David Christensen
  2020-08-07 16:59     ` David Christensen
@ 2020-08-12 11:28     ` Power, Ciara
  1 sibling, 0 replies; 276+ messages in thread
From: Power, Ciara @ 2020-08-12 11:28 UTC (permalink / raw)
  To: David Christensen, dev
  Cc: Richardson, Bruce, Ruifeng Wang, Jerin Jacob, Honnappa Nagarahalli

Hi David,


>-----Original Message-----
>From: David Christensen <drc@linux.vnet.ibm.com>
>Sent: Friday 7 August 2020 17:32
>To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
>Cc: Richardson, Bruce <bruce.richardson@intel.com>; Ruifeng Wang
><ruifeng.wang@arm.com>; Jerin Jacob <jerinj@marvell.com>; Honnappa
>Nagarahalli <honnappa.nagarahalli@arm.com>
>Subject: Re: [PATCH 20.11 02/12] eal: add default SIMD bitwidth values
>
>On 8/7/20 8:58 AM, Ciara Power wrote:
>> Each arch has a define for the default SIMD bitwidth value, this is
>> used on EAL init to set the config max SIMD bitwidth.
>
>What's the intended use case?


- For x86, it allows other apps such as OVS and VPP which already make use of
  AVX-512 to indicate that they are happy for DPDK to use AVX-512 too.
- It allows the end-user to override those settings if so desired.
- For all architectures, it allows an easy way to disable vector code or limit vector
  length if so desired, which can be useful for testing.


<snip>


Thanks,
Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL
  2020-08-11  5:36 ` Honnappa Nagarahalli
@ 2020-08-12 11:39   ` Power, Ciara
  0 siblings, 0 replies; 276+ messages in thread
From: Power, Ciara @ 2020-08-12 11:39 UTC (permalink / raw)
  To: Honnappa Nagarahalli, dev; +Cc: Richardson, Bruce, nd, nd

Hi Honnappa,

 
>-----Original Message-----
>From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
>Sent: Tuesday 11 August 2020 06:37
>To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
>Cc: Richardson, Bruce <bruce.richardson@intel.com>; nd <nd@arm.com>;
>Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; nd
><nd@arm.com>
>Subject: RE: [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL
>
>Hi Ciara,
>	I have not reviewed other patches in this series yet. Few questions
>inline.
>
>> -----Original Message-----
>> From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power
>> Sent: Friday, August 7, 2020 10:59 AM
>> To: dev@dpdk.org
>> Cc: bruce.richardson@intel.com; Ciara Power <ciara.power@intel.com>
>> Subject: [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL
>>
>> A number of components in DPDK have optional AVX-512 or other vector
>> code paths which can be selected at runtime. Rather than having each
>> component provide its own mechanism to select a code path, this
>> patchset adds support for a single setting to control what code paths are
>used.
>Do you mean that all the components will have to use AVX-512?
>IMO, different libraries might behave differently to the use of different vector
>sizes. Are we taking away the ability to use different vector sizes for different
>components.
>

No, this setting is a max bitwidth which can be set by apps using the EAL API 
based on what is best for the usage in their apps, or by the user with an EAL flag,
but each library is still free to choose it's own best path, subject to it not being
longer than the specified max. 
For example, if the max bitwidth is set to 512, a library can still choose to use a 256-bit
path over a 512 one if its advantageous.


>> This can be used to enable some non-default code paths e.g. ones using
>> AVX- 512, but also to limit the code paths to certain vector widths,
>> or to scalar code only, which is useful for testing.
>>
>> The max SIMD bitwidth setting can be set by the app itself through use
>> of the available API, or can be overriden by a commandline argument
>> passed by the user.
>Arm platforms support SVE (scalable vector extensions) feature. With this
>feature, the code is agnostic to the vector size. i.e. same code can run on
>various vector sizes. There is no code yet in DPDK with this feature. But, it will
>be added in the near future. It would be good to handle this now so that we
>do not have issues in the future..

Do you have any suggestions how this could be handled?


<snip>

Thanks,
Ciara


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 00/17] add max SIMD bitwidth to EAL
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (13 preceding siblings ...)
  2020-08-11  5:36 ` Honnappa Nagarahalli
@ 2020-08-27 16:12 ` " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth Ciara Power
                     ` (16 more replies)
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                   ` (6 subsequent siblings)
  21 siblings, 17 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power

v2:
  - Added some documentation.
  - Modified default max bitwidth for Arm.
  - Moved mlx5 condition check into existing check vec support function.
  - Added max SIMD bitwidth checks to some libraries.

A number of components in DPDK have optional AVX-512 or other vector
code paths which can be selected at runtime. Rather than having each
component provide its own mechanism to select a code path, this patchset
adds support for a single setting to control what code paths are used.
This can be used to enable some non-default code paths e.g. ones using
AVX-512, but also to limit the code paths to certain vector widths, or
to scalar code only, which is useful for testing.

The max SIMD bitwidth setting can be set by the app itself through use of
the available API, or can be overriden by a commandline argument passed by
the user.

Ciara Power (17):
  eal: add max SIMD bitwidth
  eal: add default SIMD bitwidth values
  doc: add detail on using max SIMD bitwidth
  net/i40e: add checks for max SIMD bitwidth
  net/axgbe: add checks for max SIMD bitwidth
  net/bnxt: add checks for max SIMD bitwidth
  net/enic: add checks for max SIMD bitwidth
  net/fm10k: add checks for max SIMD bitwidth
  net/iavf: add checks for max SIMD bitwidth
  net/ice: add checks for max SIMD bitwidth
  net/ixgbe: add checks for max SIMD bitwidth
  net/mlx5: add checks for max SIMD bitwidth
  net/virtio: add checks for max SIMD bitwidth
  distributor: add checks for max SIMD bitwidth
  member: add checks for max SIMD bitwidth
  efd: add checks for max SIMD bitwidth
  net: add checks for max SIMD bitwidth

 doc/guides/howto/avx512.rst                   | 36 +++++++++++
 doc/guides/linux_gsg/eal_args.include.rst     | 12 ++++
 .../prog_guide/env_abstraction_layer.rst      | 31 +++++++++
 drivers/net/axgbe/axgbe_rxtx.c                |  3 +-
 drivers/net/bnxt/bnxt_ethdev.c                |  6 +-
 drivers/net/enic/enic_rxtx_vec_avx2.c         |  3 +-
 drivers/net/fm10k/fm10k_ethdev.c              | 11 +++-
 drivers/net/i40e/i40e_rxtx.c                  | 19 ++++--
 drivers/net/iavf/iavf_rxtx.c                  | 16 +++--
 drivers/net/ice/ice_rxtx.c                    | 20 ++++--
 drivers/net/ixgbe/ixgbe_rxtx.c                |  7 ++-
 drivers/net/mlx5/mlx5_rxtx_vec.c              |  2 +
 drivers/net/virtio/virtio_ethdev.c            | 12 ++--
 lib/librte_distributor/rte_distributor.c      |  3 +-
 lib/librte_eal/arm/include/rte_vect.h         |  2 +
 lib/librte_eal/common/eal_common_options.c    | 63 +++++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h      |  8 +++
 lib/librte_eal/common/eal_options.h           |  2 +
 lib/librte_eal/include/generic/rte_vect.h     |  2 +
 lib/librte_eal/include/rte_eal.h              | 32 ++++++++++
 lib/librte_eal/ppc/include/rte_vect.h         |  2 +
 lib/librte_eal/rte_eal_version.map            |  4 ++
 lib/librte_eal/x86/include/rte_vect.h         |  2 +
 lib/librte_efd/rte_efd.c                      |  7 ++-
 lib/librte_member/rte_member_ht.c             |  3 +-
 lib/librte_net/rte_net_crc.c                  |  8 +++
 26 files changed, 281 insertions(+), 35 deletions(-)
 create mode 100644 doc/guides/howto/avx512.rst

-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
@ 2020-08-27 16:12   ` Ciara Power
  2020-09-04  5:30     ` Honnappa Nagarahalli
  2020-09-06 22:01     ` Ananyev, Konstantin
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 02/17] eal: add default SIMD bitwidth values Ciara Power
                     ` (15 subsequent siblings)
  16 siblings, 2 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Ray Kinsella, Neil Horman

This patch adds a max SIMD bitwidth EAL configuration. The API allows
for an app to set this value. It can also be set using EAL argument
--force-max-simd-bitwidth, which will lock the value and override any
modifications made by the app.

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v2: Added to Doxygen comment for API.
---
 lib/librte_eal/common/eal_common_options.c | 60 ++++++++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/include/rte_eal.h           | 32 ++++++++++++
 lib/librte_eal/rte_eal_version.map         |  4 ++
 5 files changed, 106 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index a5426e1234..90f4e8f5c3 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -102,6 +102,7 @@ eal_long_options[] = {
 	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
+	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
 	{0,                     0, NULL, 0                        }
 };
 
@@ -1309,6 +1310,32 @@ eal_parse_iova_mode(const char *name)
 	return 0;
 }
 
+static int
+eal_parse_simd_bitwidth(const char *arg, bool locked)
+{
+	char *end;
+	uint16_t bitwidth;
+	int ret;
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+
+	if (arg == NULL || arg[0] == '\0')
+		return -1;
+
+	errno = 0;
+	bitwidth = strtoul(arg, &end, 0);
+
+	/* check for errors */
+	if ((errno != 0) || end == NULL || (*end != '\0'))
+		return -1;
+
+	ret = rte_set_max_simd_bitwidth(bitwidth);
+	if (ret < 0)
+		return -1;
+	internal_conf->max_simd_bitwidth.locked = locked;
+	return 0;
+}
+
 static int
 eal_parse_base_virtaddr(const char *arg)
 {
@@ -1707,6 +1734,13 @@ eal_parse_common_option(int opt, const char *optarg,
 	case OPT_NO_TELEMETRY_NUM:
 		conf->no_telemetry = 1;
 		break;
+	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
+		if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
+			return -1;
+		}
+		break;
 
 	/* don't know what to do, leave this to caller */
 	default:
@@ -1903,6 +1937,31 @@ eal_check_common_options(struct internal_config *internal_cfg)
 	return 0;
 }
 
+uint16_t
+rte_get_max_simd_bitwidth(void)
+{
+	const struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	return internal_conf->max_simd_bitwidth.bitwidth;
+}
+
+int
+rte_set_max_simd_bitwidth(uint16_t bitwidth)
+{
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	if (internal_conf->max_simd_bitwidth.locked) {
+		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
+		return -EPERM;
+	}
+	if (bitwidth < RTE_NO_SIMD || !rte_is_power_of_2(bitwidth)) {
+		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
+		return -EINVAL;
+	}
+	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
+	return 0;
+}
+
 void
 eal_common_usage(void)
 {
@@ -1981,6 +2040,7 @@ eal_common_usage(void)
 	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
 	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
 	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
+	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
 	       "\nEAL options for DEBUG use only:\n"
 	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
 	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 13f93388a7..367e0cc19e 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -33,6 +33,12 @@ struct hugepage_info {
 	int lock_descriptor;    /**< file descriptor for hugepage dir */
 };
 
+struct simd_bitwidth {
+	/**< flag indicating if bitwidth is locked from further modification */
+	bool locked;
+	uint16_t bitwidth; /**< bitwidth value */
+};
+
 /**
  * internal configuration
  */
@@ -85,6 +91,8 @@ struct internal_config {
 	volatile unsigned int init_complete;
 	/**< indicates whether EAL has completed initialization */
 	unsigned int no_telemetry; /**< true to disable Telemetry */
+	/** max simd bitwidth path to use */
+	struct simd_bitwidth max_simd_bitwidth;
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 89769d48b4..ef33979664 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -85,6 +85,8 @@ enum {
 	OPT_TELEMETRY_NUM,
 #define OPT_NO_TELEMETRY      "no-telemetry"
 	OPT_NO_TELEMETRY_NUM,
+#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
+	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index ddcf6a2e7a..8148f650f2 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -43,6 +43,13 @@ enum rte_proc_type_t {
 	RTE_PROC_INVALID
 };
 
+enum rte_max_simd_t {
+	RTE_NO_SIMD = 64,
+	RTE_MAX_128_SIMD = 128,
+	RTE_MAX_256_SIMD = 256,
+	RTE_MAX_512_SIMD = 512
+};
+
 /**
  * Get the process type in a multi-process setup
  *
@@ -51,6 +58,31 @@ enum rte_proc_type_t {
  */
 enum rte_proc_type_t rte_eal_process_type(void);
 
+/**
+ * Get the supported SIMD bitwidth.
+ *
+ * @return
+ *   uint16_t bitwidth.
+ */
+__rte_experimental
+uint16_t rte_get_max_simd_bitwidth(void);
+
+/**
+ * Set the supported SIMD bitwidth.
+ * This API should only be called once at initialization, before EAL init.
+ *
+ * @param bitwidth
+ *   uint16_t bitwidth.
+ * @return
+ *   0 on success.
+ * @return
+ *   -EINVAL on invalid bitwidth parameter.
+ * @return
+ *   -EPERM if bitwidth is locked.
+ */
+__rte_experimental
+int rte_set_max_simd_bitwidth(uint16_t bitwidth);
+
 /**
  * Request iopl privilege for all RPL.
  *
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index bf0c17c233..8059ea76b6 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -403,6 +403,10 @@ EXPERIMENTAL {
 	rte_mp_disable;
 	rte_thread_register;
 	rte_thread_unregister;
+
+	# added in 20.11
+	rte_get_max_simd_bitwidth;
+	rte_set_max_simd_bitwidth;
 };
 
 INTERNAL {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 02/17] eal: add default SIMD bitwidth values
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth Ciara Power
@ 2020-08-27 16:12   ` Ciara Power
  2020-09-04  5:30     ` Honnappa Nagarahalli
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 03/17] doc: add detail on using max SIMD bitwidth Ciara Power
                     ` (14 subsequent siblings)
  16 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev
  Cc: Ciara Power, Ruifeng Wang, Jerin Jacob, Honnappa Nagarahalli,
	David Christensen, Jan Viktorin, Bruce Richardson,
	Konstantin Ananyev

Each arch has a define for the default SIMD bitwidth value, this is used
on EAL init to set the config max SIMD bitwidth.

Cc: Ruifeng Wang <ruifeng.wang@arm.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Cc: David Christensen <drc@linux.vnet.ibm.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v2: Changed default bitwidth for Arm to 128.
---
 lib/librte_eal/arm/include/rte_vect.h      | 2 ++
 lib/librte_eal/common/eal_common_options.c | 3 +++
 lib/librte_eal/include/generic/rte_vect.h  | 2 ++
 lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
 lib/librte_eal/x86/include/rte_vect.h      | 2 ++
 5 files changed, 11 insertions(+)

diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
index 01c51712a1..2cd61d6279 100644
--- a/lib/librte_eal/arm/include/rte_vect.h
+++ b/lib/librte_eal/arm/include/rte_vect.h
@@ -14,6 +14,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 128
+
 typedef int32x4_t xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 90f4e8f5c3..c2a9624f89 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -35,6 +35,7 @@
 #ifndef RTE_EXEC_ENV_WINDOWS
 #include <rte_telemetry.h>
 #endif
+#include <rte_vect.h>
 
 #include "eal_internal_cfg.h"
 #include "eal_options.h"
@@ -344,6 +345,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
 	CPU_ZERO(&internal_cfg->ctrl_cpuset);
 	internal_cfg->init_complete = 0;
+	internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
+	internal_cfg->max_simd_bitwidth.locked = 0;
 }
 
 static int
diff --git a/lib/librte_eal/include/generic/rte_vect.h b/lib/librte_eal/include/generic/rte_vect.h
index 3fc47979f8..e98f184a97 100644
--- a/lib/librte_eal/include/generic/rte_vect.h
+++ b/lib/librte_eal/include/generic/rte_vect.h
@@ -14,6 +14,8 @@
 
 #include <stdint.h>
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 256
+
 /* Unsigned vector types */
 
 /**
diff --git a/lib/librte_eal/ppc/include/rte_vect.h b/lib/librte_eal/ppc/include/rte_vect.h
index b0545c878c..70fbd0c423 100644
--- a/lib/librte_eal/ppc/include/rte_vect.h
+++ b/lib/librte_eal/ppc/include/rte_vect.h
@@ -15,6 +15,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 256
+
 typedef vector signed int xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/x86/include/rte_vect.h b/lib/librte_eal/x86/include/rte_vect.h
index df5a607623..b1df75aca7 100644
--- a/lib/librte_eal/x86/include/rte_vect.h
+++ b/lib/librte_eal/x86/include/rte_vect.h
@@ -35,6 +35,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 256
+
 typedef __m128i xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 03/17] doc: add detail on using max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 02/17] eal: add default SIMD bitwidth values Ciara Power
@ 2020-08-27 16:12   ` Ciara Power
  2020-09-06 22:20     ` Ananyev, Konstantin
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 04/17] net/i40e: add checks for " Ciara Power
                     ` (13 subsequent siblings)
  16 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Anatoly Burakov, John McNamara, Marko Kovacevic

This patch adds documentation on the usage of the max SIMD bitwidth EAL
setting, and how to use it to enable AVX-512 at runtime.

Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: John McNamara <john.mcnamara@intel.com>
Cc: Marko Kovacevic <marko.kovacevic@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 doc/guides/howto/avx512.rst                   | 36 +++++++++++++++++++
 doc/guides/linux_gsg/eal_args.include.rst     | 12 +++++++
 .../prog_guide/env_abstraction_layer.rst      | 31 ++++++++++++++++
 3 files changed, 79 insertions(+)
 create mode 100644 doc/guides/howto/avx512.rst

diff --git a/doc/guides/howto/avx512.rst b/doc/guides/howto/avx512.rst
new file mode 100644
index 0000000000..ebae0f2b4f
--- /dev/null
+++ b/doc/guides/howto/avx512.rst
@@ -0,0 +1,36 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2020 Intel Corporation.
+
+
+Using AVX-512 with DPDK
+=======================
+
+AVX-512 is not used by default in DPDK, but it can be selected at runtime by apps through the use of EAL API,
+and by the user with a commandline argument. DPDK has a setting for max SIMD bitwidth,
+which can be modified and will then limit the vector path taken by the code.
+
+
+Using the API in apps
+---------------------
+
+Apps can request DPDK uses AVX-512 at runtime, if it provides improved application performance.
+This can be done by modifying the EAL setting for max SIMD bitwidth to 512, as by default it is 256,
+which does not allow for AVX-512.
+
+.. code-block:: c
+
+   rte_set_max_simd_bitwidth(RTE_MAX_512_SIMD);
+
+This API should only be called once at initialization, before EAL init.
+For more information on the possible enum values to use as a parameter, go to :ref:`max_simd_bitwidth`:
+
+
+Using the command-line argument
+---------------------------------------------
+
+The user can select to use AVX-512 at runtime, using the following argument to set the max bitwidth::
+
+   ./app/dpdk-testpmd --force-max-simd-bitwidth=512
+
+This will override any further changes to the max SIMD bitwidth in DPDK,
+which is useful for testing purposes.
diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
index 0fe4457968..bab3e14e47 100644
--- a/doc/guides/linux_gsg/eal_args.include.rst
+++ b/doc/guides/linux_gsg/eal_args.include.rst
@@ -210,3 +210,15 @@ Other options
 *    ``--no-telemetry``:
 
     Disable telemetry.
+
+*    ``--force-max-simd-bitwidth=<val>``:
+
+    Specify the maximum SIMD bitwidth size to handle. This limits which vector paths,
+    if any, are taken, as any paths taken must use a bitwidth below the max bitwidth limit.
+    For example, to allow all SIMD bitwidths up to and including AVX-512::
+
+        --force-max-simd-bitwidth=512
+
+    The following example shows limiting the bitwidth to 64-bits to disable all vector code::
+
+        --force-max-simd-bitwidth=64
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index f64ae953d1..74f26ed6c9 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -486,6 +486,37 @@ the desired addressing mode when virtual devices that are not directly attached
 To facilitate forcing the IOVA mode to a specific value the EAL command line option ``--iova-mode`` can
 be used to select either physical addressing('pa') or virtual addressing('va').
 
+.. _max_simd_bitwidth:
+
+
+Max SIMD bitwidth
+~~~~~~~~~~~~~~~~~
+
+The EAL provides a single setting to limit the max SIMD bitwidth used by DPDK,
+which is used in determining the vector path, if any, chosen by a component.
+The value can be set at runtime by an application using the 'rte_set_max_simd_bitwidth(uint16_t bitwidth)' function,
+which should only be called once at initialization, before EAL init.
+The value can be overridden by the user using the EAL command-line option '--force-max-sim-bitwidth'.
+
+When choosing a vector path, along with checking the CPU feature support,
+the value of the max SIMD bitwidth must also be checked, and can be retrieved using the 'rte_get_max_simd_bitwidth()' function.
+The value should be compared against the enum values for accepted max SIMD bitwidths:
+
+.. code-block:: c
+
+   enum rte_max_simd_t {
+       RTE_NO_SIMD = 64,
+       RTE_MAX_128_SIMD = 128,
+       RTE_MAX_256_SIMD = 256,
+       RTE_MAX_512_SIMD = 512
+   };
+
+    if (rte_get_max_simd_bitwidth() >= RTE_MAX_512_SIMD)
+        /* Take AVX-512 vector path */
+    else if (rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
+        /* Take AVX2 vector path */
+
+
 Memory Segments and Memory Zones (memzone)
 ------------------------------------------
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 04/17] net/i40e: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (2 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 03/17] doc: add detail on using max SIMD bitwidth Ciara Power
@ 2020-08-27 16:12   ` " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 05/17] net/axgbe: " Ciara Power
                     ` (12 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Beilei Xing, Jeff Guo

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Beilei Xing <beilei.xing@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index fe7f9200c1..90f4e26fb8 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3098,7 +3098,8 @@ static eth_rx_burst_t
 i40e_get_latest_rx_vec(bool scatter)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3115,7 +3116,8 @@ i40e_get_recommend_rx_vec(bool scatter)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3154,7 +3156,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 	}
 
-	if (ad->rx_vec_allowed) {
+	if (ad->rx_vec_allowed  && rte_get_max_simd_bitwidth()
+			>= RTE_MAX_128_SIMD) {
 		/* Vec Rx path */
 		PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
 				dev->data->port_id);
@@ -3268,7 +3271,8 @@ static eth_tx_burst_t
 i40e_get_latest_tx_vec(void)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3283,7 +3287,8 @@ i40e_get_recommend_tx_vec(void)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3311,7 +3316,9 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 	}
 
 	if (ad->tx_simple_allowed) {
-		if (ad->tx_vec_allowed) {
+		if (ad->tx_vec_allowed &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
 			if (ad->use_latest_vec)
 				dev->tx_pkt_burst =
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 05/17] net/axgbe: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (3 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 04/17] net/i40e: add checks for " Ciara Power
@ 2020-08-27 16:12   ` " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 06/17] net/bnxt: " Ciara Power
                     ` (11 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Somalapuram Amaranath

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Somalapuram Amaranath <asomalap@amd.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/axgbe/axgbe_rxtx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/axgbe/axgbe_rxtx.c b/drivers/net/axgbe/axgbe_rxtx.c
index 30c467db71..6200954caa 100644
--- a/drivers/net/axgbe/axgbe_rxtx.c
+++ b/drivers/net/axgbe/axgbe_rxtx.c
@@ -553,7 +553,8 @@ int axgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!pdata->tx_queues)
 		pdata->tx_queues = dev->data->tx_queues;
 
-	if (txq->vector_disable)
+	if (txq->vector_disable || rte_get_max_simd_bitwidth()
+			< RTE_MAX_128_SIMD)
 		dev->tx_pkt_burst = &axgbe_xmit_pkts;
 	else
 #ifdef RTE_ARCH_X86
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 06/17] net/bnxt: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (4 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 05/17] net/axgbe: " Ciara Power
@ 2020-08-27 16:12   ` " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 07/17] net/enic: " Ciara Power
                     ` (10 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Ajit Khaparde, Somnath Kotur

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/bnxt/bnxt_ethdev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 510a0d9e0a..626aae8881 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1100,7 +1100,8 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev)
 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
 		DEV_RX_OFFLOAD_RSS_HASH |
 		DEV_RX_OFFLOAD_VLAN_FILTER)) &&
-	    !BNXT_TRUFLOW_EN(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) && rte_get_max_simd_bitwidth()
+			>= RTE_MAX_128_SIMD) {
 		PMD_DRV_LOG(INFO, "Using vector mode receive for port %d\n",
 			    eth_dev->data->port_id);
 		bp->flags |= BNXT_FLAG_RX_VECTOR_PKT_MODE;
@@ -1132,7 +1133,8 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 	 */
 	if (!eth_dev->data->scattered_rx &&
 	    !eth_dev->data->dev_conf.txmode.offloads &&
-	    !BNXT_TRUFLOW_EN(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) &&
+	    rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		PMD_DRV_LOG(INFO, "Using vector mode transmit for port %d\n",
 			    eth_dev->data->port_id);
 		return bnxt_xmit_pkts_vec;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 07/17] net/enic: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (5 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 06/17] net/bnxt: " Ciara Power
@ 2020-08-27 16:12   ` " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 08/17] net/fm10k: " Ciara Power
                     ` (9 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, John Daley, Hyong Youb Kim

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: John Daley <johndale@cisco.com>
Cc: Hyong Youb Kim <hyonkim@cisco.com>

Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/enic/enic_rxtx_vec_avx2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c b/drivers/net/enic/enic_rxtx_vec_avx2.c
index 676b9f5fdb..5db43bdbb8 100644
--- a/drivers/net/enic/enic_rxtx_vec_avx2.c
+++ b/drivers/net/enic/enic_rxtx_vec_avx2.c
@@ -821,7 +821,8 @@ enic_use_vector_rx_handler(struct rte_eth_dev *eth_dev)
 	fconf = &eth_dev->data->dev_conf.fdir_conf;
 	if (fconf->mode != RTE_FDIR_MODE_NONE)
 		return false;
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD) {
 		ENICPMD_LOG(DEBUG, " use the non-scatter avx2 Rx handler");
 		eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
 		enic->use_noscatter_vec_rx_handler = 1;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 08/17] net/fm10k: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (6 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 07/17] net/enic: " Ciara Power
@ 2020-08-27 16:12   ` " Ciara Power
  2020-10-07  5:01     ` Wang, Xiao W
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 09/17] net/iavf: " Ciara Power
                     ` (8 subsequent siblings)
  16 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Qi Zhang, Xiao Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qi Zhang <qi.z.zhang@intel.com>
Cc: Xiao Wang <xiao.w.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/fm10k/fm10k_ethdev.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index b574693bca..f7c41d4377 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -2937,7 +2937,9 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
 		/* primary process has set the ftag flag and offloads */
 		txq = dev->data->tx_queues[0];
-		if (fm10k_tx_vec_condition_check(txq)) {
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth()
+				< RTE_MAX_128_SIMD) {
 			dev->tx_pkt_burst = fm10k_xmit_pkts;
 			dev->tx_pkt_prepare = fm10k_prep_pkts;
 			PMD_INIT_LOG(DEBUG, "Use regular Tx func");
@@ -2956,7 +2958,8 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 		txq = dev->data->tx_queues[i];
 		txq->tx_ftag_en = tx_ftag_en;
 		/* Check if Vector Tx is satisfied */
-		if (fm10k_tx_vec_condition_check(txq))
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD)
 			use_sse = 0;
 	}
 
@@ -2990,7 +2993,9 @@ fm10k_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met.
 	 */
 	if (!fm10k_rx_vec_condition_check(dev) &&
-			dev_info->rx_vec_allowed && !rx_ftag_en) {
+			dev_info->rx_vec_allowed && !rx_ftag_en &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 		if (dev->data->scattered_rx)
 			dev->rx_pkt_burst = fm10k_recv_scattered_pkts_vec;
 		else
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 09/17] net/iavf: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (7 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 08/17] net/fm10k: " Ciara Power
@ 2020-08-27 16:12   ` " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 10/17] net/ice: " Ciara Power
                     ` (7 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Jingjing Wu, Beilei Xing

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Jingjing Wu <jingjing.wu@intel.com>
Cc: Beilei Xing <beilei.xing@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 05a7dd898a..b798d082a2 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2105,14 +2105,16 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_rx_vec_dev_check(dev)) {
+	if (!iavf_rx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
 			rxq = dev->data->rx_queues[i];
 			(void)iavf_rxq_vec_setup(rxq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 			use_avx2 = true;
 
 		if (dev->data->scattered_rx) {
@@ -2178,7 +2180,8 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_tx_vec_dev_check(dev)) {
+	if (!iavf_tx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
 			txq = dev->data->tx_queues[i];
 			if (!txq)
@@ -2186,8 +2189,9 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 			iavf_txq_vec_setup(txq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 			use_avx2 = true;
 
 		PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 10/17] net/ice: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (8 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 09/17] net/iavf: " Ciara Power
@ 2020-08-27 16:12   ` " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 11/17] net/ixgbe: " Ciara Power
                     ` (6 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Qiming Yang, Qi Zhang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qiming Yang <qiming.yang@intel.com>
Cc: Qi Zhang <qi.z.zhang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/ice/ice_rxtx.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 2e1f06d2c0..eda2d9a8c7 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -2889,7 +2889,9 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed) {
+		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			ad->rx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
 				rxq = dev->data->rx_queues[i];
@@ -2899,8 +2901,10 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_MAX_256_SIMD)
 				use_avx2 = true;
 
 		} else {
@@ -3067,7 +3071,9 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_tx_vec_dev_check(dev)) {
+		if (!ice_tx_vec_dev_check(dev) &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			ad->tx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_tx_queues; i++) {
 				txq = dev->data->tx_queues[i];
@@ -3077,8 +3083,10 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_MAX_256_SIMD)
 				use_avx2 = true;
 
 		} else {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 11/17] net/ixgbe: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (9 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 10/17] net/ice: " Ciara Power
@ 2020-08-27 16:12   ` " Ciara Power
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 12/17] net/mlx5: " Ciara Power
                     ` (5 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Wei Zhao, Jeff Guo

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Wei Zhao <wei.zhao1@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 977ecf5137..eadc7183f2 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
 		dev->tx_pkt_prepare = NULL;
 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
-					ixgbe_txq_vec_setup(txq) == 0)) {
+					ixgbe_txq_vec_setup(txq) == 0) &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
 			dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
 		} else
@@ -4743,7 +4745,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met and Rx Bulk Allocation should be allowed.
 	 */
 	if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
-	    !adapter->rx_bulk_alloc_allowed) {
+	    !adapter->rx_bulk_alloc_allowed ||
+			rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD) {
 		PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
 				    "preconditions",
 			     dev->data->port_id);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 12/17] net/mlx5: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (10 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 11/17] net/ixgbe: " Ciara Power
@ 2020-08-27 16:12   ` " Ciara Power
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 13/17] net/virtio: " Ciara Power
                     ` (4 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:12 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Matan Azrad, Shahaf Shuler, Viacheslav Ovsiienko

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Matan Azrad <matan@mellanox.com>
Cc: Shahaf Shuler <shahafs@mellanox.com>
Cc: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v2: Moved check for max bitwidth into existing check vec
    support function.
---
 drivers/net/mlx5/mlx5_rxtx_vec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 711dcd35fa..c384c737dc 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -148,6 +148,8 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	uint32_t i;
 
+	if (rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD)
+		return -ENOTSUP;
 	if (!priv->config.rx_vec_en)
 		return -ENOTSUP;
 	if (mlx5_mprq_enabled(dev))
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 13/17] net/virtio: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (11 preceding siblings ...)
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 12/17] net/mlx5: " Ciara Power
@ 2020-08-27 16:13   ` " Ciara Power
  2020-08-31  2:39     ` Xia, Chenbo
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 14/17] distributor: " Ciara Power
                     ` (3 subsequent siblings)
  16 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:13 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Maxime Coquelin, Chenbo Xia, Zhihong Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: Chenbo Xia <chenbo.xia@intel.com>
Cc: Zhihong Wang <zhihong.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index dc0093bdf0..f779ce8396 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1517,9 +1517,11 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 	if (vtpci_packed_queue(hw)) {
 		PMD_INIT_LOG(INFO,
 			"virtio: using packed ring %s Tx path on port %u",
-			hw->use_vec_tx ? "vectorized" : "standard",
+			(hw->use_vec_tx && rte_get_max_simd_bitwidth()
+			> RTE_MAX_256_SIMD) ? "vectorized" : "standard",
 			eth_dev->data->port_id);
-		if (hw->use_vec_tx)
+		if (hw->use_vec_tx && rte_get_max_simd_bitwidth()
+				> RTE_MAX_256_SIMD)
 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
 		else
 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
@@ -1536,7 +1538,8 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 	}
 
 	if (vtpci_packed_queue(hw)) {
-		if (hw->use_vec_rx) {
+		if (hw->use_vec_rx && rte_get_max_simd_bitwidth()
+				> RTE_MAX_256_SIMD) {
 			PMD_INIT_LOG(INFO,
 				"virtio: using packed ring vectorized Rx path on port %u",
 				eth_dev->data->port_id);
@@ -1555,7 +1558,8 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
 		}
 	} else {
-		if (hw->use_vec_rx) {
+		if (hw->use_vec_rx && rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
 				eth_dev->data->port_id);
 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 14/17] distributor: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (12 preceding siblings ...)
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 13/17] net/virtio: " Ciara Power
@ 2020-08-27 16:13   ` " Ciara Power
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 15/17] member: " Ciara Power
                     ` (2 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:13 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, David Hunt

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: David Hunt <david.hunt@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_distributor/rte_distributor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c
index 1c047f065a..9f0a9b1d48 100644
--- a/lib/librte_distributor/rte_distributor.c
+++ b/lib/librte_distributor/rte_distributor.c
@@ -636,7 +636,8 @@ rte_distributor_create(const char *name,
 
 	d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
 #if defined(RTE_ARCH_X86)
-	d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
+	if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
+		d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
 #endif
 
 	/*
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 15/17] member: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (13 preceding siblings ...)
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 14/17] distributor: " Ciara Power
@ 2020-08-27 16:13   ` " Ciara Power
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 16/17] efd: " Ciara Power
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 17/17] net: " Ciara Power
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:13 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Yipeng Wang, Sameh Gobriel

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU
enabled path.

Cc: Yipeng Wang <yipeng1.wang@intel.com>
Cc: Sameh Gobriel <sameh.gobriel@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_member/rte_member_ht.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c
index cbcd0d4407..71e3cf7b52 100644
--- a/lib/librte_member/rte_member_ht.c
+++ b/lib/librte_member/rte_member_ht.c
@@ -113,7 +113,8 @@ rte_member_create_ht(struct rte_member_setsum *ss,
 	}
 #if defined(RTE_ARCH_X86)
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
-			RTE_MEMBER_BUCKET_ENTRIES == 16)
+			RTE_MEMBER_BUCKET_ENTRIES == 16 &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
 	else
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 16/17] efd: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (14 preceding siblings ...)
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 15/17] member: " Ciara Power
@ 2020-08-27 16:13   ` " Ciara Power
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 17/17] net: " Ciara Power
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:13 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Byron Marohn, Yipeng Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Byron Marohn <byron.marohn@intel.com>
Cc: Yipeng Wang <yipeng1.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_efd/rte_efd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
index 6a799556d4..509ecc8256 100644
--- a/lib/librte_efd/rte_efd.c
+++ b/lib/librte_efd/rte_efd.c
@@ -645,7 +645,9 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len,
 	 * For less than 4 bits, scalar function performs better
 	 * than vectorised version
 	 */
-	if (RTE_EFD_VALUE_NUM_BITS > 3 && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (RTE_EFD_VALUE_NUM_BITS > 3
+			&& rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)
+			&& rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		table->lookup_fn = EFD_LOOKUP_AVX2;
 	else
 #endif
@@ -655,7 +657,8 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len,
 	 * than vectorised version
 	 */
 	if (RTE_EFD_VALUE_NUM_BITS > 16 &&
-	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
 		table->lookup_fn = EFD_LOOKUP_NEON;
 	else
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v2 17/17] net: add checks for max SIMD bitwidth
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
                     ` (15 preceding siblings ...)
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 16/17] efd: " Ciara Power
@ 2020-08-27 16:13   ` " Ciara Power
  2020-09-02 11:02     ` Singh, Jasvinder
  16 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-08-27 16:13 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Jasvinder Singh, Olivier Matz

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path. This check is done just before the handler is called, it cannot
be done when setting the handlers initially as the EAL max simd bitwidth
value has not yet been set.

Cc: Jasvinder Singh <jasvinder.singh@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_net/rte_net_crc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
index 9fd4794a9d..d3d3206919 100644
--- a/lib/librte_net/rte_net_crc.c
+++ b/lib/librte_net/rte_net_crc.c
@@ -9,6 +9,7 @@
 #include <rte_cpuflags.h>
 #include <rte_common.h>
 #include <rte_net_crc.h>
+#include <rte_eal.h>
 
 #if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ)
 #define X86_64_SSE42_PCLMULQDQ     1
@@ -60,6 +61,8 @@ static rte_net_crc_handler handlers_neon[] = {
 };
 #endif
 
+static uint16_t max_simd_bitwidth;
+
 /**
  * Reflect the bits about the middle
  *
@@ -175,6 +178,11 @@ rte_net_crc_calc(const void *data,
 	uint32_t ret;
 	rte_net_crc_handler f_handle;
 
+	if (max_simd_bitwidth == 0)
+		max_simd_bitwidth = rte_get_max_simd_bitwidth();
+	if (max_simd_bitwidth < RTE_MAX_128_SIMD &&
+			handlers != handlers_scalar)
+		rte_net_crc_set_alg(RTE_NET_CRC_SCALAR);
 	f_handle = handlers[type];
 	ret = f_handle(data, data_len);
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 13/17] net/virtio: add checks for max SIMD bitwidth
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 13/17] net/virtio: " Ciara Power
@ 2020-08-31  2:39     ` Xia, Chenbo
  0 siblings, 0 replies; 276+ messages in thread
From: Xia, Chenbo @ 2020-08-31  2:39 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Maxime Coquelin, Wang, Zhihong

Hi Ciara,

Sorry for late response and thanks for working on this! Currently virtio
driver puts all vector-related conditions in dev_configure (virtio_dev_configure).
Do you think it's ok to put all below code logic to dev_configure?

Thanks!
Chenbo

> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Friday, August 28, 2020 12:13 AM
> To: dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; Xia, Chenbo <chenbo.xia@intel.com>; Wang,
> Zhihong <zhihong.wang@intel.com>
> Subject: [PATCH v2 13/17] net/virtio: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Cc: Chenbo Xia <chenbo.xia@intel.com>
> Cc: Zhihong Wang <zhihong.wang@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  drivers/net/virtio/virtio_ethdev.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index dc0093bdf0..f779ce8396 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1517,9 +1517,11 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
>  	if (vtpci_packed_queue(hw)) {
>  		PMD_INIT_LOG(INFO,
>  			"virtio: using packed ring %s Tx path on port %u",
> -			hw->use_vec_tx ? "vectorized" : "standard",
> +			(hw->use_vec_tx && rte_get_max_simd_bitwidth()
> +			> RTE_MAX_256_SIMD) ? "vectorized" : "standard",
>  			eth_dev->data->port_id);
> -		if (hw->use_vec_tx)
> +		if (hw->use_vec_tx && rte_get_max_simd_bitwidth()
> +				> RTE_MAX_256_SIMD)
>  			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
>  		else
>  			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
> @@ -1536,7 +1538,8 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
>  	}
> 
>  	if (vtpci_packed_queue(hw)) {
> -		if (hw->use_vec_rx) {
> +		if (hw->use_vec_rx && rte_get_max_simd_bitwidth()
> +				> RTE_MAX_256_SIMD) {
>  			PMD_INIT_LOG(INFO,
>  				"virtio: using packed ring vectorized Rx path on
> port %u",
>  				eth_dev->data->port_id);
> @@ -1555,7 +1558,8 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
>  			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
>  		}
>  	} else {
> -		if (hw->use_vec_rx) {
> +		if (hw->use_vec_rx && rte_get_max_simd_bitwidth()
> +				>= RTE_MAX_128_SIMD) {
>  			PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on
> port %u",
>  				eth_dev->data->port_id);
>  			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 17/17] net: add checks for max SIMD bitwidth
  2020-08-27 16:13   ` [dpdk-dev] [PATCH v2 17/17] net: " Ciara Power
@ 2020-09-02 11:02     ` Singh, Jasvinder
  0 siblings, 0 replies; 276+ messages in thread
From: Singh, Jasvinder @ 2020-09-02 11:02 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Olivier Matz



> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Thursday, August 27, 2020 5:13 PM
> To: dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Singh, Jasvinder
> <jasvinder.singh@intel.com>; Olivier Matz <olivier.matz@6wind.com>
> Subject: [PATCH v2 17/17] net: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path. This check is
> done just before the handler is called, it cannot be done when setting the
> handlers initially as the EAL max simd bitwidth value has not yet been set.
> 
> Cc: Jasvinder Singh <jasvinder.singh@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  lib/librte_net/rte_net_crc.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index
> 9fd4794a9d..d3d3206919 100644
> --- a/lib/librte_net/rte_net_crc.c
> +++ b/lib/librte_net/rte_net_crc.c
> @@ -9,6 +9,7 @@
>  #include <rte_cpuflags.h>
>  #include <rte_common.h>
>  #include <rte_net_crc.h>
> +#include <rte_eal.h>
> 
>  #if defined(RTE_ARCH_X86_64) &&
> defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ)
>  #define X86_64_SSE42_PCLMULQDQ     1
> @@ -60,6 +61,8 @@ static rte_net_crc_handler handlers_neon[] = {  };
> #endif
> 
> +static uint16_t max_simd_bitwidth;
> +
>  /**
>   * Reflect the bits about the middle
>   *
> @@ -175,6 +178,11 @@ rte_net_crc_calc(const void *data,
>  	uint32_t ret;
>  	rte_net_crc_handler f_handle;
> 
> +	if (max_simd_bitwidth == 0)
> +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +	if (max_simd_bitwidth < RTE_MAX_128_SIMD &&
> +			handlers != handlers_scalar)
> +		rte_net_crc_set_alg(RTE_NET_CRC_SCALAR);


Above change doesn't seem right as rte_net_crc_set_alg () is invoked everytime when crc is computed. It potentially adds branches in runtime.  In my opinion,  bit width should be checked inside rte_net_crc_set_alg () function which is supposed to be used during initialization stage after eal sets the max simd bit width. 

>  	f_handle = handlers[type];
>  	ret = f_handle(data, data_len);
> 
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 02/17] eal: add default SIMD bitwidth values
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 02/17] eal: add default SIMD bitwidth values Ciara Power
@ 2020-09-04  5:30     ` Honnappa Nagarahalli
  0 siblings, 0 replies; 276+ messages in thread
From: Honnappa Nagarahalli @ 2020-09-04  5:30 UTC (permalink / raw)
  To: Ciara Power, dev
  Cc: Ruifeng Wang, jerinj, David Christensen, Jan Viktorin,
	Bruce Richardson, Konstantin Ananyev, nd, Honnappa Nagarahalli,
	nd

<snip>

> 
> Each arch has a define for the default SIMD bitwidth value, this is used on
> EAL init to set the config max SIMD bitwidth.
> 
> Cc: Ruifeng Wang <ruifeng.wang@arm.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Cc: David Christensen <drc@linux.vnet.ibm.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v2: Changed default bitwidth for Arm to 128.
Thanks for this change.
Continuing the discussion from V1, for SVE (Scalable Vector Extensions - code is vector width agnostic, allowing the same binary to run on multiple platforms with different vector width), I am thinking we should add a default value which we could use on Arm platforms to identify the choice.
I have added some comments in 1/17.

> ---
>  lib/librte_eal/arm/include/rte_vect.h      | 2 ++
>  lib/librte_eal/common/eal_common_options.c | 3 +++
> lib/librte_eal/include/generic/rte_vect.h  | 2 ++
>  lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
>  lib/librte_eal/x86/include/rte_vect.h      | 2 ++
>  5 files changed, 11 insertions(+)
> 
> diff --git a/lib/librte_eal/arm/include/rte_vect.h
> b/lib/librte_eal/arm/include/rte_vect.h
> index 01c51712a1..2cd61d6279 100644
> --- a/lib/librte_eal/arm/include/rte_vect.h
> +++ b/lib/librte_eal/arm/include/rte_vect.h
> @@ -14,6 +14,8 @@
>  extern "C" {
>  #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 128
> +
>  typedef int32x4_t xmm_t;
> 
>  #define	XMM_SIZE	(sizeof(xmm_t))
> diff --git a/lib/librte_eal/common/eal_common_options.c
> b/lib/librte_eal/common/eal_common_options.c
> index 90f4e8f5c3..c2a9624f89 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -35,6 +35,7 @@
>  #ifndef RTE_EXEC_ENV_WINDOWS
>  #include <rte_telemetry.h>
>  #endif
> +#include <rte_vect.h>
> 
>  #include "eal_internal_cfg.h"
>  #include "eal_options.h"
> @@ -344,6 +345,8 @@ eal_reset_internal_config(struct internal_config
> *internal_cfg)
>  	internal_cfg->user_mbuf_pool_ops_name = NULL;
>  	CPU_ZERO(&internal_cfg->ctrl_cpuset);
>  	internal_cfg->init_complete = 0;
> +	internal_cfg->max_simd_bitwidth.bitwidth =
> RTE_DEFAULT_SIMD_BITWIDTH;
> +	internal_cfg->max_simd_bitwidth.locked = 0;
>  }
> 
>  static int
> diff --git a/lib/librte_eal/include/generic/rte_vect.h
> b/lib/librte_eal/include/generic/rte_vect.h
> index 3fc47979f8..e98f184a97 100644
> --- a/lib/librte_eal/include/generic/rte_vect.h
> +++ b/lib/librte_eal/include/generic/rte_vect.h
> @@ -14,6 +14,8 @@
> 
>  #include <stdint.h>
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  /* Unsigned vector types */
> 
>  /**
> diff --git a/lib/librte_eal/ppc/include/rte_vect.h
> b/lib/librte_eal/ppc/include/rte_vect.h
> index b0545c878c..70fbd0c423 100644
> --- a/lib/librte_eal/ppc/include/rte_vect.h
> +++ b/lib/librte_eal/ppc/include/rte_vect.h
> @@ -15,6 +15,8 @@
>  extern "C" {
>  #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  typedef vector signed int xmm_t;
> 
>  #define	XMM_SIZE	(sizeof(xmm_t))
> diff --git a/lib/librte_eal/x86/include/rte_vect.h
> b/lib/librte_eal/x86/include/rte_vect.h
> index df5a607623..b1df75aca7 100644
> --- a/lib/librte_eal/x86/include/rte_vect.h
> +++ b/lib/librte_eal/x86/include/rte_vect.h
> @@ -35,6 +35,8 @@
>  extern "C" {
>  #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  typedef __m128i xmm_t;
> 
>  #define	XMM_SIZE	(sizeof(xmm_t))
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth Ciara Power
@ 2020-09-04  5:30     ` Honnappa Nagarahalli
  2020-09-04  8:45       ` Bruce Richardson
  2020-09-06 22:01     ` Ananyev, Konstantin
  1 sibling, 1 reply; 276+ messages in thread
From: Honnappa Nagarahalli @ 2020-09-04  5:30 UTC (permalink / raw)
  To: Ciara Power, dev; +Cc: Ray Kinsella, Neil Horman, nd, Honnappa Nagarahalli, nd

<snip>

> 
> This patch adds a max SIMD bitwidth EAL configuration. The API allows for an
> app to set this value. It can also be set using EAL argument --force-max-simd-
> bitwidth, which will lock the value and override any modifications made by
> the app.
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v2: Added to Doxygen comment for API.
> ---
>  lib/librte_eal/common/eal_common_options.c | 60
> ++++++++++++++++++++++
>  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>  lib/librte_eal/common/eal_options.h        |  2 +
>  lib/librte_eal/include/rte_eal.h           | 32 ++++++++++++
>  lib/librte_eal/rte_eal_version.map         |  4 ++
>  5 files changed, 106 insertions(+)
> 
> diff --git a/lib/librte_eal/common/eal_common_options.c
> b/lib/librte_eal/common/eal_common_options.c
> index a5426e1234..90f4e8f5c3 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -102,6 +102,7 @@ eal_long_options[] = {
>  	{OPT_MATCH_ALLOCATIONS, 0, NULL,
> OPT_MATCH_ALLOCATIONS_NUM},
>  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> +	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL,
> +OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
>  	{0,                     0, NULL, 0                        }
>  };
> 
> @@ -1309,6 +1310,32 @@ eal_parse_iova_mode(const char *name)
>  	return 0;
>  }
> 
> +static int
> +eal_parse_simd_bitwidth(const char *arg, bool locked) {
> +	char *end;
> +	uint16_t bitwidth;
> +	int ret;
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +
> +	if (arg == NULL || arg[0] == '\0')
> +		return -1;
> +
> +	errno = 0;
> +	bitwidth = strtoul(arg, &end, 0);
> +
> +	/* check for errors */
> +	if ((errno != 0) || end == NULL || (*end != '\0'))
> +		return -1;
> +
> +	ret = rte_set_max_simd_bitwidth(bitwidth);
> +	if (ret < 0)
> +		return -1;
> +	internal_conf->max_simd_bitwidth.locked = locked;
> +	return 0;
> +}
> +
>  static int
>  eal_parse_base_virtaddr(const char *arg)  { @@ -1707,6 +1734,13 @@
> eal_parse_common_option(int opt, const char *optarg,
>  	case OPT_NO_TELEMETRY_NUM:
>  		conf->no_telemetry = 1;
>  		break;
> +	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
> +		if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
> +			RTE_LOG(ERR, EAL, "invalid parameter for --"
> +					OPT_FORCE_MAX_SIMD_BITWIDTH
> "\n");
> +			return -1;
> +		}
> +		break;
> 
>  	/* don't know what to do, leave this to caller */
>  	default:
> @@ -1903,6 +1937,31 @@ eal_check_common_options(struct
> internal_config *internal_cfg)
>  	return 0;
>  }
> 
> +uint16_t
> +rte_get_max_simd_bitwidth(void)
> +{
> +	const struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	return internal_conf->max_simd_bitwidth.bitwidth;
> +}
> +
> +int
> +rte_set_max_simd_bitwidth(uint16_t bitwidth) {
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	if (internal_conf->max_simd_bitwidth.locked) {
> +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user
> runtime override enabled");
> +		return -EPERM;
> +	}
> +	if (bitwidth < RTE_NO_SIMD || !rte_is_power_of_2(bitwidth)) {
> +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> +		return -EINVAL;
> +	}
> +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> +	return 0;
> +}
> +
>  void
>  eal_common_usage(void)
>  {
> @@ -1981,6 +2040,7 @@ eal_common_usage(void)
>  	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
>  	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by
> default)\n"
>  	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
> +	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD
> bitwidth\n"
>  	       "\nEAL options for DEBUG use only:\n"
>  	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
>  	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h
> b/lib/librte_eal/common/eal_internal_cfg.h
> index 13f93388a7..367e0cc19e 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -33,6 +33,12 @@ struct hugepage_info {
>  	int lock_descriptor;    /**< file descriptor for hugepage dir */
>  };
> 
> +struct simd_bitwidth {
> +	/**< flag indicating if bitwidth is locked from further modification */
> +	bool locked;
> +	uint16_t bitwidth; /**< bitwidth value */ };
> +
>  /**
>   * internal configuration
>   */
> @@ -85,6 +91,8 @@ struct internal_config {
>  	volatile unsigned int init_complete;
>  	/**< indicates whether EAL has completed initialization */
>  	unsigned int no_telemetry; /**< true to disable Telemetry */
> +	/** max simd bitwidth path to use */
> +	struct simd_bitwidth max_simd_bitwidth;
>  };
> 
>  void eal_reset_internal_config(struct internal_config *internal_cfg); diff --git
> a/lib/librte_eal/common/eal_options.h
> b/lib/librte_eal/common/eal_options.h
> index 89769d48b4..ef33979664 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -85,6 +85,8 @@ enum {
>  	OPT_TELEMETRY_NUM,
>  #define OPT_NO_TELEMETRY      "no-telemetry"
>  	OPT_NO_TELEMETRY_NUM,
> +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
> +	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
>  	OPT_LONG_MAX_NUM
>  };
> 
> diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> index ddcf6a2e7a..8148f650f2 100644
> --- a/lib/librte_eal/include/rte_eal.h
> +++ b/lib/librte_eal/include/rte_eal.h
> @@ -43,6 +43,13 @@ enum rte_proc_type_t {
>  	RTE_PROC_INVALID
>  };
> 
> +enum rte_max_simd_t {
We could add a RTE_MAX_SIMD = 0. Arm platforms can use this to choose SVE.

> +	RTE_NO_SIMD = 64,
> +	RTE_MAX_128_SIMD = 128,
> +	RTE_MAX_256_SIMD = 256,
> +	RTE_MAX_512_SIMD = 512
> +};
> +
>  /**
>   * Get the process type in a multi-process setup
>   *
> @@ -51,6 +58,31 @@ enum rte_proc_type_t {
>   */
>  enum rte_proc_type_t rte_eal_process_type(void);
> 
> +/**
> + * Get the supported SIMD bitwidth.
> + *
> + * @return
> + *   uint16_t bitwidth.
> + */
> +__rte_experimental
> +uint16_t rte_get_max_simd_bitwidth(void);
> +
> +/**
> + * Set the supported SIMD bitwidth.
> + * This API should only be called once at initialization, before EAL init.
> + *
> + * @param bitwidth
> + *   uint16_t bitwidth.
> + * @return
> + *   0 on success.
> + * @return
> + *   -EINVAL on invalid bitwidth parameter.
> + * @return
> + *   -EPERM if bitwidth is locked.
> + */
> +__rte_experimental
> +int rte_set_max_simd_bitwidth(uint16_t bitwidth);
> +
>  /**
>   * Request iopl privilege for all RPL.
>   *
> diff --git a/lib/librte_eal/rte_eal_version.map
> b/lib/librte_eal/rte_eal_version.map
> index bf0c17c233..8059ea76b6 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -403,6 +403,10 @@ EXPERIMENTAL {
>  	rte_mp_disable;
>  	rte_thread_register;
>  	rte_thread_unregister;
> +
> +	# added in 20.11
> +	rte_get_max_simd_bitwidth;
> +	rte_set_max_simd_bitwidth;
>  };
> 
>  INTERNAL {
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth
  2020-09-04  5:30     ` Honnappa Nagarahalli
@ 2020-09-04  8:45       ` Bruce Richardson
  2020-09-09 19:30         ` Honnappa Nagarahalli
  0 siblings, 1 reply; 276+ messages in thread
From: Bruce Richardson @ 2020-09-04  8:45 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: Ciara Power, dev, Ray Kinsella, Neil Horman, nd

On Fri, Sep 04, 2020 at 05:30:28AM +0000, Honnappa Nagarahalli wrote:
> <snip>
> 
 > diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> > index ddcf6a2e7a..8148f650f2 100644
> > --- a/lib/librte_eal/include/rte_eal.h
> > +++ b/lib/librte_eal/include/rte_eal.h
> > @@ -43,6 +43,13 @@ enum rte_proc_type_t {
> >  	RTE_PROC_INVALID
> >  };
> > 
> > +enum rte_max_simd_t {
> We could add a RTE_MAX_SIMD = 0. Arm platforms can use this to choose SVE.
> 

Is zero the best value for this? Would setting it to MAX_INT or some
other big number be better, in terms of comparisons operations, or does
that just not apply at all with SVE?

> > +	RTE_NO_SIMD = 64,
> > +	RTE_MAX_128_SIMD = 128,
> > +	RTE_MAX_256_SIMD = 256,
> > +	RTE_MAX_512_SIMD = 512
> > +};
> > +
 

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth Ciara Power
  2020-09-04  5:30     ` Honnappa Nagarahalli
@ 2020-09-06 22:01     ` Ananyev, Konstantin
  1 sibling, 0 replies; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-09-06 22:01 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Power, Ciara, Ray Kinsella, Neil Horman

> This patch adds a max SIMD bitwidth EAL configuration. The API allows
> for an app to set this value. It can also be set using EAL argument
> --force-max-simd-bitwidth, which will lock the value and override any
> modifications made by the app.
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v2: Added to Doxygen comment for API.
> ---
>  lib/librte_eal/common/eal_common_options.c | 60 ++++++++++++++++++++++
>  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>  lib/librte_eal/common/eal_options.h        |  2 +
>  lib/librte_eal/include/rte_eal.h           | 32 ++++++++++++
>  lib/librte_eal/rte_eal_version.map         |  4 ++
>  5 files changed, 106 insertions(+)
> 
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index a5426e1234..90f4e8f5c3 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -102,6 +102,7 @@ eal_long_options[] = {
>  	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
>  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> +	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
>  	{0,                     0, NULL, 0                        }
>  };
> 
> @@ -1309,6 +1310,32 @@ eal_parse_iova_mode(const char *name)
>  	return 0;
>  }
> 
> +static int
> +eal_parse_simd_bitwidth(const char *arg, bool locked)
> +{
> +	char *end;
> +	uint16_t bitwidth;
> +	int ret;
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +
> +	if (arg == NULL || arg[0] == '\0')
> +		return -1;
> +
> +	errno = 0;
> +	bitwidth = strtoul(arg, &end, 0);

As I can see with that assignment you'll loose high bits set (if any).
So, --force-max-simd-bitwidth=0xf0080
wouldn't report any error, while it probably should.
Probably something like that, as abetter way:
unsigned long t;
...
t = strtoul(arg, &end, 0);
if (t > UINT16_MAX || errno != 0 || end == NULL || *end != '\0')
	return -1;
ret = rte_set_max_simd_bitwidth(t);

> +
> +	/* check for errors */
> +	if ((errno != 0) || end == NULL || (*end != '\0'))
> +		return -1;
> +
> +	ret = rte_set_max_simd_bitwidth(bitwidth);
> +	if (ret < 0)
> +		return -1;
> +	internal_conf->max_simd_bitwidth.locked = locked;
> +	return 0;
> +}
> +

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 03/17] doc: add detail on using max SIMD bitwidth
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 03/17] doc: add detail on using max SIMD bitwidth Ciara Power
@ 2020-09-06 22:20     ` Ananyev, Konstantin
  2020-09-07  8:44       ` Bruce Richardson
  0 siblings, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-09-06 22:20 UTC (permalink / raw)
  To: Power, Ciara, dev
  Cc: Power, Ciara, Burakov, Anatoly, Mcnamara, John, Kovacevic, Marko

> This patch adds documentation on the usage of the max SIMD bitwidth EAL
> setting, and how to use it to enable AVX-512 at runtime.
> 
> Cc: Anatoly Burakov <anatoly.burakov@intel.com>
> Cc: John McNamara <john.mcnamara@intel.com>
> Cc: Marko Kovacevic <marko.kovacevic@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  doc/guides/howto/avx512.rst                   | 36 +++++++++++++++++++
>  doc/guides/linux_gsg/eal_args.include.rst     | 12 +++++++
>  .../prog_guide/env_abstraction_layer.rst      | 31 ++++++++++++++++
>  3 files changed, 79 insertions(+)
>  create mode 100644 doc/guides/howto/avx512.rst
> 
> diff --git a/doc/guides/howto/avx512.rst b/doc/guides/howto/avx512.rst
> new file mode 100644
> index 0000000000..ebae0f2b4f
> --- /dev/null
> +++ b/doc/guides/howto/avx512.rst
> @@ -0,0 +1,36 @@
> +..  SPDX-License-Identifier: BSD-3-Clause
> +    Copyright(c) 2020 Intel Corporation.
> +
> +
> +Using AVX-512 with DPDK
> +=======================
> +
> +AVX-512 is not used by default in DPDK, but it can be selected at runtime by apps through the use of EAL API,
> +and by the user with a commandline argument. DPDK has a setting for max SIMD bitwidth,
> +which can be modified and will then limit the vector path taken by the code.

It's is a good idea to have such ability,
though just one global variable for all DPDK lib/drivers
seems a bit coarse to me.
Let say we have 2 libs: libA and libB.
Both do have RTE_MAX_512_SIMD specific code-path,
though libA  would cause frequency level change, while libB wouldn't.
So user (to avoid frequency level change) would have to block
512_SIMD for both libs.
I think it would be much better to follow the strategy we use for log-level:
there is a global simd_width, but each DDPK entity (lib/driver) also has   
it's own simd_width that overrules a global one (more fine-grained control).

> +
> +
> +Using the API in apps
> +---------------------
> +
> +Apps can request DPDK uses AVX-512 at runtime, if it provides improved application performance.
> +This can be done by modifying the EAL setting for max SIMD bitwidth to 512, as by default it is 256,
> +which does not allow for AVX-512.
> +
> +.. code-block:: c
> +
> +   rte_set_max_simd_bitwidth(RTE_MAX_512_SIMD);
> +
> +This API should only be called once at initialization, before EAL init.

If the only possible usage scenario for that function is init time before  EAL init,
then do we really need it at all?
As we have cmd-line flag anyway?
User can achieve similar goal, by just:  rte_eal_init(,..."--force-max-simd-bitwidth=..."...); 

> +For more information on the possible enum values to use as a parameter, go to :ref:`max_simd_bitwidth`:
> +
> +
> +Using the command-line argument
> +---------------------------------------------
> +
> +The user can select to use AVX-512 at runtime, using the following argument to set the max bitwidth::
> +
> +   ./app/dpdk-testpmd --force-max-simd-bitwidth=512
> +
> +This will override any further changes to the max SIMD bitwidth in DPDK,
> +which is useful for testing purposes.
> diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
> index 0fe4457968..bab3e14e47 100644
> --- a/doc/guides/linux_gsg/eal_args.include.rst
> +++ b/doc/guides/linux_gsg/eal_args.include.rst
> @@ -210,3 +210,15 @@ Other options
>  *    ``--no-telemetry``:
> 
>      Disable telemetry.
> +
> +*    ``--force-max-simd-bitwidth=<val>``:
> +
> +    Specify the maximum SIMD bitwidth size to handle. This limits which vector paths,
> +    if any, are taken, as any paths taken must use a bitwidth below the max bitwidth limit.
> +    For example, to allow all SIMD bitwidths up to and including AVX-512::
> +
> +        --force-max-simd-bitwidth=512
> +
> +    The following example shows limiting the bitwidth to 64-bits to disable all vector code::
> +
> +        --force-max-simd-bitwidth=64
> diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
> index f64ae953d1..74f26ed6c9 100644
> --- a/doc/guides/prog_guide/env_abstraction_layer.rst
> +++ b/doc/guides/prog_guide/env_abstraction_layer.rst
> @@ -486,6 +486,37 @@ the desired addressing mode when virtual devices that are not directly attached
>  To facilitate forcing the IOVA mode to a specific value the EAL command line option ``--iova-mode`` can
>  be used to select either physical addressing('pa') or virtual addressing('va').
> 
> +.. _max_simd_bitwidth:
> +
> +
> +Max SIMD bitwidth
> +~~~~~~~~~~~~~~~~~
> +
> +The EAL provides a single setting to limit the max SIMD bitwidth used by DPDK,
> +which is used in determining the vector path, if any, chosen by a component.
> +The value can be set at runtime by an application using the 'rte_set_max_simd_bitwidth(uint16_t bitwidth)' function,
> +which should only be called once at initialization, before EAL init.
> +The value can be overridden by the user using the EAL command-line option '--force-max-sim-bitwidth'.
> +
> +When choosing a vector path, along with checking the CPU feature support,
> +the value of the max SIMD bitwidth must also be checked, and can be retrieved using the 'rte_get_max_simd_bitwidth()' function.
> +The value should be compared against the enum values for accepted max SIMD bitwidths:
> +
> +.. code-block:: c
> +
> +   enum rte_max_simd_t {
> +       RTE_NO_SIMD = 64,
> +       RTE_MAX_128_SIMD = 128,
> +       RTE_MAX_256_SIMD = 256,
> +       RTE_MAX_512_SIMD = 512
> +   };
> +
> +    if (rte_get_max_simd_bitwidth() >= RTE_MAX_512_SIMD)
> +        /* Take AVX-512 vector path */
> +    else if (rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
> +        /* Take AVX2 vector path */
> +
> +
>  Memory Segments and Memory Zones (memzone)
>  ------------------------------------------
> 
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 03/17] doc: add detail on using max SIMD bitwidth
  2020-09-06 22:20     ` Ananyev, Konstantin
@ 2020-09-07  8:44       ` Bruce Richardson
  2020-09-07 12:01         ` Ananyev, Konstantin
  0 siblings, 1 reply; 276+ messages in thread
From: Bruce Richardson @ 2020-09-07  8:44 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: Power, Ciara, dev, Burakov, Anatoly, Mcnamara, John, Kovacevic, Marko

On Sun, Sep 06, 2020 at 10:20:30PM +0000, Ananyev, Konstantin wrote:
> > This patch adds documentation on the usage of the max SIMD bitwidth EAL
> > setting, and how to use it to enable AVX-512 at runtime.
> > 
> > Cc: Anatoly Burakov <anatoly.burakov@intel.com>
> > Cc: John McNamara <john.mcnamara@intel.com>
> > Cc: Marko Kovacevic <marko.kovacevic@intel.com>
> > 
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > ---
> >  doc/guides/howto/avx512.rst                   | 36 +++++++++++++++++++
> >  doc/guides/linux_gsg/eal_args.include.rst     | 12 +++++++
> >  .../prog_guide/env_abstraction_layer.rst      | 31 ++++++++++++++++
> >  3 files changed, 79 insertions(+)
> >  create mode 100644 doc/guides/howto/avx512.rst
> > 
> > diff --git a/doc/guides/howto/avx512.rst b/doc/guides/howto/avx512.rst
> > new file mode 100644
> > index 0000000000..ebae0f2b4f
> > --- /dev/null
> > +++ b/doc/guides/howto/avx512.rst
> > @@ -0,0 +1,36 @@
> > +..  SPDX-License-Identifier: BSD-3-Clause
> > +    Copyright(c) 2020 Intel Corporation.
> > +
> > +
> > +Using AVX-512 with DPDK
> > +=======================
> > +
> > +AVX-512 is not used by default in DPDK, but it can be selected at runtime by apps through the use of EAL API,
> > +and by the user with a commandline argument. DPDK has a setting for max SIMD bitwidth,
> > +which can be modified and will then limit the vector path taken by the code.
> 
> It's is a good idea to have such ability,
> though just one global variable for all DPDK lib/drivers
> seems a bit coarse to me.
> Let say we have 2 libs: libA and libB.
> Both do have RTE_MAX_512_SIMD specific code-path,
> though libA  would cause frequency level change, while libB wouldn't.
> So user (to avoid frequency level change) would have to block
> 512_SIMD for both libs.
> I think it would be much better to follow the strategy we use for log-level:
> there is a global simd_width, but each DDPK entity (lib/driver) also has   
> it's own simd_width that overrules a global one (more fine-grained control).

That for me is a nightmare scenario. How is the user meant to know what
libs could cause him a frequency or not, or is he meant to determine that
empirically by trial and error on each platform? This scenario is
completely unlike logging in that it's non-obvious to the user, and so
needs to be kept as consumable as possible to the app-developer and the
user. Unless we find a concrete scenario where having a single switch is
causing real user problems, I'd much rather keep things simple. See also
answer below, where I point out that the main target of this is developers,
who can use this flag to indicate what vector bitwidth their app uses, and
then allow DPDK to match that.

> 
> > +
> > +
> > +Using the API in apps
> > +---------------------
> > +
> > +Apps can request DPDK uses AVX-512 at runtime, if it provides improved application performance.
> > +This can be done by modifying the EAL setting for max SIMD bitwidth to 512, as by default it is 256,
> > +which does not allow for AVX-512.
> > +
> > +.. code-block:: c
> > +
> > +   rte_set_max_simd_bitwidth(RTE_MAX_512_SIMD);
> > +
> > +This API should only be called once at initialization, before EAL init.
> 
> If the only possible usage scenario for that function is init time before  EAL init,
> then do we really need it at all?
> As we have cmd-line flag anyway?
> User can achieve similar goal, by just:  rte_eal_init(,..."--force-max-simd-bitwidth=..."...); 

Ideally, the user should never know or care about the cmdline flag, it's
only for testing. The main criteria for allowing DPDK to use longer
instruction sets is whether the application itself will similarly use them,
and that's something for the programmer to do. Having the programmer muck
about with cmdline arguments is less than ideal, so a proper API is
warrented here. The reason for the note about EAL init, is that we don't
want libraries to have to check the max bitwidth each time an API is
called, so we want to have a way to prevent people changing things at
runtime. This therefore seemed simplest.

/Bruce

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 03/17] doc: add detail on using max SIMD bitwidth
  2020-09-07  8:44       ` Bruce Richardson
@ 2020-09-07 12:01         ` Ananyev, Konstantin
  0 siblings, 0 replies; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-09-07 12:01 UTC (permalink / raw)
  To: Richardson, Bruce
  Cc: Power, Ciara, dev, Burakov, Anatoly, Mcnamara, John, Kovacevic, Marko



> On Sun, Sep 06, 2020 at 10:20:30PM +0000, Ananyev, Konstantin wrote:
> > > This patch adds documentation on the usage of the max SIMD bitwidth EAL
> > > setting, and how to use it to enable AVX-512 at runtime.
> > >
> > > Cc: Anatoly Burakov <anatoly.burakov@intel.com>
> > > Cc: John McNamara <john.mcnamara@intel.com>
> > > Cc: Marko Kovacevic <marko.kovacevic@intel.com>
> > >
> > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > > ---
> > >  doc/guides/howto/avx512.rst                   | 36 +++++++++++++++++++
> > >  doc/guides/linux_gsg/eal_args.include.rst     | 12 +++++++
> > >  .../prog_guide/env_abstraction_layer.rst      | 31 ++++++++++++++++
> > >  3 files changed, 79 insertions(+)
> > >  create mode 100644 doc/guides/howto/avx512.rst
> > >
> > > diff --git a/doc/guides/howto/avx512.rst b/doc/guides/howto/avx512.rst
> > > new file mode 100644
> > > index 0000000000..ebae0f2b4f
> > > --- /dev/null
> > > +++ b/doc/guides/howto/avx512.rst
> > > @@ -0,0 +1,36 @@
> > > +..  SPDX-License-Identifier: BSD-3-Clause
> > > +    Copyright(c) 2020 Intel Corporation.
> > > +
> > > +
> > > +Using AVX-512 with DPDK
> > > +=======================
> > > +
> > > +AVX-512 is not used by default in DPDK, but it can be selected at runtime by apps through the use of EAL API,
> > > +and by the user with a commandline argument. DPDK has a setting for max SIMD bitwidth,
> > > +which can be modified and will then limit the vector path taken by the code.
> >
> > It's is a good idea to have such ability,
> > though just one global variable for all DPDK lib/drivers
> > seems a bit coarse to me.
> > Let say we have 2 libs: libA and libB.
> > Both do have RTE_MAX_512_SIMD specific code-path,
> > though libA  would cause frequency level change, while libB wouldn't.
> > So user (to avoid frequency level change) would have to block
> > 512_SIMD for both libs.
> > I think it would be much better to follow the strategy we use for log-level:
> > there is a global simd_width, but each DDPK entity (lib/driver) also has
> > it's own simd_width that overrules a global one (more fine-grained control).
> 
> That for me is a nightmare scenario. How is the user meant to know what
> libs could cause him a frequency or not, or is he meant to determine that
> empirically by trial and error on each platform? 

I suppose yes.
Let say user can try to run the appp with global
--force-max-simd-bitwidth=256 and --force-max-simd-bitwidth=512
and check the diffenrence.
If he is happy with performance he get, he can stick with one of global values (256/512).
If not he can try further with choosing different max-simd-width for different components.

>This scenario is
> completely unlike logging in that it's non-obvious to the user, and so
> needs to be kept as consumable as possible to the app-developer and the
> user.

This feature is totally optional, if user feels like he doesn't need to care about it,
he can simply ignore it and use default values.
Though for those who do care, one global value seems too restrictive.

> Unless we find a concrete scenario where having a single switch is
> causing real user problems, I'd much rather keep things simple.

As an example, I run several perf tests with acl avx512 code path and
so far didn't see any switches to CORE_POWER.LVL2_TURBO_LICENSE
(heavy AVX512 instructions).
I presume there might be other light-weight avx512 codepaths (lpm, etc.).
Though for crypto cpu PMDs (aesni-mb, etc.) I think it would cause switch
to the LVL2.

> See also answer below, where I point out that the main target of this is developers,
> who can use this flag to indicate what vector bitwidth their app uses,
> and then allow DPDK to match that.

But in majority if cases developer doesn't know for sure on what platform his app will run
(unless quite rare situation when app is developed for one particular platform).
Again for complex/multi-purpose applications (like VPP, DPDK-OVS) developer can't even
always predict what modules will be used and which wouldn't.
Again app can be configured in a way that different modules can run on different cores
(let say module that does ACL lookup on core X, module that does actual crypto on core Y).  
All that depends on particular deployment scenarios.
So in many cases only end-user has all information to decide what max-simd width will be optimal.  

> 
> >
> > > +
> > > +
> > > +Using the API in apps
> > > +---------------------
> > > +
> > > +Apps can request DPDK uses AVX-512 at runtime, if it provides improved application performance.
> > > +This can be done by modifying the EAL setting for max SIMD bitwidth to 512, as by default it is 256,
> > > +which does not allow for AVX-512.
> > > +
> > > +.. code-block:: c
> > > +
> > > +   rte_set_max_simd_bitwidth(RTE_MAX_512_SIMD);
> > > +
> > > +This API should only be called once at initialization, before EAL init.
> >
> > If the only possible usage scenario for that function is init time before  EAL init,
> > then do we really need it at all?
> > As we have cmd-line flag anyway?
> > User can achieve similar goal, by just:  rte_eal_init(,..."--force-max-simd-bitwidth=..."...);
> 
> Ideally, the user should never know or care about the cmdline flag, it's
> only for testing. The main criteria for allowing DPDK to use longer
> instruction sets is whether the application itself will similarly use them,
> and that's something for the programmer to do.

Unfortunately, I don't think programmer also has all information to make such decisions.
A lot depends on deployment scenarios, see above. 
 
> Having the programmer muck
> about with cmdline arguments is less than ideal, so a proper API is
> warrented here. 

Agree, function call is more convenient for the developer.

>The reason for the note about EAL init, is that we don't
> want libraries to have to check the max bitwidth each time an API is
> called, so we want to have a way to prevent people changing things at
> runtime. This therefore seemed simplest.

I understand that, but for that purpose just cmd-line flag is enough,
that's why I asked do we need an API call at all.
It seems a bit strange to me to introduce an API that supposed to be called
only *before* eal_init(), but from other side I don't see much harm from it either.
So if you and other guys still prefer to keep it - ok by me.
Konstantin
 





^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth
  2020-09-04  8:45       ` Bruce Richardson
@ 2020-09-09 19:30         ` Honnappa Nagarahalli
  2020-09-17 16:31           ` Kinsella, Ray
  0 siblings, 1 reply; 276+ messages in thread
From: Honnappa Nagarahalli @ 2020-09-09 19:30 UTC (permalink / raw)
  To: Bruce Richardson
  Cc: Ciara Power, dev, Ray Kinsella, Neil Horman, nd,
	Honnappa Nagarahalli, nd

<snip>

> >
>  > diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> > > index ddcf6a2e7a..8148f650f2 100644
> > > --- a/lib/librte_eal/include/rte_eal.h
> > > +++ b/lib/librte_eal/include/rte_eal.h
> > > @@ -43,6 +43,13 @@ enum rte_proc_type_t {
> > >  	RTE_PROC_INVALID
> > >  };
> > >
> > > +enum rte_max_simd_t {
> > We could add a RTE_MAX_SIMD = 0. Arm platforms can use this to choose
> SVE.
> >
> 
> Is zero the best value for this? Would setting it to MAX_INT or some other big
> number be better, in terms of comparisons operations, or does that just not
> apply at all with SVE?
I suggested zero as the bitwidth can be specified from the command line. It would be much easier to input zero vs other number.

> 
> > > +	RTE_NO_SIMD = 64,
> > > +	RTE_MAX_128_SIMD = 128,
> > > +	RTE_MAX_256_SIMD = 256,
> > > +	RTE_MAX_512_SIMD = 512
> > > +};
> > > +
> 

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth
  2020-09-09 19:30         ` Honnappa Nagarahalli
@ 2020-09-17 16:31           ` Kinsella, Ray
  2020-09-17 16:43             ` Bruce Richardson
  2020-09-18  2:13             ` Honnappa Nagarahalli
  0 siblings, 2 replies; 276+ messages in thread
From: Kinsella, Ray @ 2020-09-17 16:31 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Bruce Richardson; +Cc: Ciara Power, dev, Neil Horman, nd



On 09/09/2020 20:30, Honnappa Nagarahalli wrote:
> <snip>
> 
>>>
>>  > diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
>>>> index ddcf6a2e7a..8148f650f2 100644
>>>> --- a/lib/librte_eal/include/rte_eal.h
>>>> +++ b/lib/librte_eal/include/rte_eal.h
>>>> @@ -43,6 +43,13 @@ enum rte_proc_type_t {
>>>>  	RTE_PROC_INVALID
>>>>  };
>>>>
>>>> +enum rte_max_simd_t {
>>> We could add a RTE_MAX_SIMD = 0. Arm platforms can use this to choose
>> SVE.
>>>
>>
>> Is zero the best value for this? Would setting it to MAX_INT or some other big
>> number be better, in terms of comparisons operations, or does that just not
>> apply at all with SVE?
> I suggested zero as the bitwidth can be specified from the command line. It would be much easier to input zero vs other number.

Right, but it doesn't end up being that intuitive as interface 
0 is enabled, 64 is not, 128 is enabled etc .... 

Suggest we use a max 16bit integer as 0xFFFF?

> 
>>
>>>> +	RTE_NO_SIMD = 64,
>>>> +	RTE_MAX_128_SIMD = 128,
>>>> +	RTE_MAX_256_SIMD = 256,
>>>> +	RTE_MAX_512_SIMD = 512
>>>> +};
>>>> +
>>

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth
  2020-09-17 16:31           ` Kinsella, Ray
@ 2020-09-17 16:43             ` Bruce Richardson
  2020-09-18  2:13             ` Honnappa Nagarahalli
  1 sibling, 0 replies; 276+ messages in thread
From: Bruce Richardson @ 2020-09-17 16:43 UTC (permalink / raw)
  To: Kinsella, Ray; +Cc: Honnappa Nagarahalli, Ciara Power, dev, Neil Horman, nd

On Thu, Sep 17, 2020 at 05:31:52PM +0100, Kinsella, Ray wrote:
> 
> 
> On 09/09/2020 20:30, Honnappa Nagarahalli wrote:
> > <snip>
> > 
> >>>
> >>  > diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> >>>> index ddcf6a2e7a..8148f650f2 100644
> >>>> --- a/lib/librte_eal/include/rte_eal.h
> >>>> +++ b/lib/librte_eal/include/rte_eal.h
> >>>> @@ -43,6 +43,13 @@ enum rte_proc_type_t {
> >>>>  	RTE_PROC_INVALID
> >>>>  };
> >>>>
> >>>> +enum rte_max_simd_t {
> >>> We could add a RTE_MAX_SIMD = 0. Arm platforms can use this to choose
> >> SVE.
> >>>
> >>
> >> Is zero the best value for this? Would setting it to MAX_INT or some other big
> >> number be better, in terms of comparisons operations, or does that just not
> >> apply at all with SVE?
> > I suggested zero as the bitwidth can be specified from the command line. It would be much easier to input zero vs other number.
> 
> Right, but it doesn't end up being that intuitive as interface 
> 0 is enabled, 64 is not, 128 is enabled etc .... 
> 
> Suggest we use a max 16bit integer as 0xFFFF?
> 
I can actually see 0 on command-line as being "unlimited", but for the APIs
and internally, I think that it should be converted to a MAX_INT value so
that the comparisons don't need to special-case zero. I agree with
Honnappa, that a -1 or maxint value is awkward on commandline, but
internally it's just an enum, so we can set it to whatever the most
practical value is.

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth
  2020-09-17 16:31           ` Kinsella, Ray
  2020-09-17 16:43             ` Bruce Richardson
@ 2020-09-18  2:13             ` Honnappa Nagarahalli
  2020-09-18  8:35               ` Bruce Richardson
  1 sibling, 1 reply; 276+ messages in thread
From: Honnappa Nagarahalli @ 2020-09-18  2:13 UTC (permalink / raw)
  To: Kinsella, Ray, Bruce Richardson
  Cc: Ciara Power, dev, Neil Horman, nd, Honnappa Nagarahalli, nd

<snip>

> >
> >>>
> >>  > diff --git a/lib/librte_eal/include/rte_eal.h
> >> b/lib/librte_eal/include/rte_eal.h
> >>>> index ddcf6a2e7a..8148f650f2 100644
> >>>> --- a/lib/librte_eal/include/rte_eal.h
> >>>> +++ b/lib/librte_eal/include/rte_eal.h
> >>>> @@ -43,6 +43,13 @@ enum rte_proc_type_t {
> >>>>  	RTE_PROC_INVALID
> >>>>  };
> >>>>
> >>>> +enum rte_max_simd_t {
> >>> We could add a RTE_MAX_SIMD = 0. Arm platforms can use this to
> >>> choose
> >> SVE.
> >>>
> >>
> >> Is zero the best value for this? Would setting it to MAX_INT or some
> >> other big number be better, in terms of comparisons operations, or
> >> does that just not apply at all with SVE?
> > I suggested zero as the bitwidth can be specified from the command line. It
> would be much easier to input zero vs other number.
> 
> Right, but it doesn't end up being that intuitive as interface
> 0 is enabled, 64 is not, 128 is enabled etc ....
> 
> Suggest we use a max 16bit integer as 0xFFFF?
I think there are 2 things here:
1) What is the internal representation (for ex: the value of the enum)? Here assigning 0xFFFF should be fine.
2) The input value at the command line. Is it possible to say that, if the user does not provide anything, then we set the option as 0xFFFF? This would mean that SVE would be used by default on Arm platforms (which is ok for me).

> 
> >
> >>
> >>>> +	RTE_NO_SIMD = 64,
> >>>> +	RTE_MAX_128_SIMD = 128,
> >>>> +	RTE_MAX_256_SIMD = 256,
> >>>> +	RTE_MAX_512_SIMD = 512
> >>>> +};
> >>>> +
> >>

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 01/17] eal: add max SIMD bitwidth
  2020-09-18  2:13             ` Honnappa Nagarahalli
@ 2020-09-18  8:35               ` Bruce Richardson
  0 siblings, 0 replies; 276+ messages in thread
From: Bruce Richardson @ 2020-09-18  8:35 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: Kinsella, Ray, Ciara Power, dev, Neil Horman, nd

On Fri, Sep 18, 2020 at 02:13:02AM +0000, Honnappa Nagarahalli wrote:
> <snip>
> 
> > >
> > >>>
> > >>  > diff --git a/lib/librte_eal/include/rte_eal.h
> > >> b/lib/librte_eal/include/rte_eal.h
> > >>>> index ddcf6a2e7a..8148f650f2 100644
> > >>>> --- a/lib/librte_eal/include/rte_eal.h
> > >>>> +++ b/lib/librte_eal/include/rte_eal.h
> > >>>> @@ -43,6 +43,13 @@ enum rte_proc_type_t {
> > >>>>  	RTE_PROC_INVALID
> > >>>>  };
> > >>>>
> > >>>> +enum rte_max_simd_t {
> > >>> We could add a RTE_MAX_SIMD = 0. Arm platforms can use this to
> > >>> choose
> > >> SVE.
> > >>>
> > >>
> > >> Is zero the best value for this? Would setting it to MAX_INT or some
> > >> other big number be better, in terms of comparisons operations, or
> > >> does that just not apply at all with SVE?
> > > I suggested zero as the bitwidth can be specified from the command line. It
> > would be much easier to input zero vs other number.
> > 
> > Right, but it doesn't end up being that intuitive as interface
> > 0 is enabled, 64 is not, 128 is enabled etc ....
> > 
> > Suggest we use a max 16bit integer as 0xFFFF?
> I think there are 2 things here:
> 1) What is the internal representation (for ex: the value of the enum)? Here assigning 0xFFFF should be fine.
> 2) The input value at the command line. Is it possible to say that, if the user does not provide anything, then we set the option as 0xFFFF? This would mean that SVE would be used by default on Arm platforms (which is ok for me).
>

Make sense. That all is perfectly doable because the initial default value
is set per architecture.

^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (14 preceding siblings ...)
  2020-08-27 16:12 ` [dpdk-dev] [PATCH v2 00/17] " Ciara Power
@ 2020-09-30 13:03 ` Ciara Power
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth Ciara Power
                     ` (17 more replies)
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                   ` (5 subsequent siblings)
  21 siblings, 18 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:03 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power

A number of components in DPDK have optional AVX-512 or other vector
code paths which can be selected at runtime. Rather than having each
component provide its own mechanism to select a code path, this patchset
adds support for a single setting to control what code paths are used.
This can be used to enable some non-default code paths e.g. ones using
AVX-512, but also to limit the code paths to certain vector widths, or
to scalar code only, which is useful for testing.

The max SIMD bitwidth setting can be set by the app itself through use of
the available API, or can be overriden by a commandline argument passed by
the user.

v3:
 - Added patch to add check for LPM lib
 - Modified default max bitwidth for Arm to disable max SIMD bitwidth,
   which will allow for SVE.
 - Added "0" as an acceptable value for command-line flag, which internally
   is used as UINT16_MAX to essentially disable max SIMD bitwidth limits.
 - Made suggested changes to net lib patch.
 - Rebased onto main.
v2:
  - Added some documentation.
  - Modified default max bitwidth for Arm.
  - Moved mlx5 condition check into existing check vec support function.
  - Added max SIMD bitwidth checks to some libraries.

Ciara Power (18):
  eal: add max SIMD bitwidth
  eal: add default SIMD bitwidth values
  doc: add detail on using max SIMD bitwidth
  net/i40e: add checks for max SIMD bitwidth
  net/axgbe: add checks for max SIMD bitwidth
  net/bnxt: add checks for max SIMD bitwidth
  net/enic: add checks for max SIMD bitwidth
  net/fm10k: add checks for max SIMD bitwidth
  net/iavf: add checks for max SIMD bitwidth
  net/ice: add checks for max SIMD bitwidth
  net/ixgbe: add checks for max SIMD bitwidth
  net/mlx5: add checks for max SIMD bitwidth
  net/virtio: add checks for max SIMD bitwidth
  distributor: add checks for max SIMD bitwidth
  member: add checks for max SIMD bitwidth
  efd: add checks for max SIMD bitwidth
  net: add checks for max SIMD bitwidth
  lpm: choose vector path at runtime

 doc/guides/howto/avx512.rst                   | 36 ++++++++++
 doc/guides/howto/index.rst                    |  1 +
 doc/guides/linux_gsg/eal_args.include.rst     | 16 +++++
 .../prog_guide/env_abstraction_layer.rst      | 32 +++++++++
 drivers/net/axgbe/axgbe_rxtx.c                |  3 +-
 drivers/net/bnxt/bnxt_ethdev.c                |  6 +-
 drivers/net/enic/enic_rxtx_vec_avx2.c         |  3 +-
 drivers/net/fm10k/fm10k_ethdev.c              | 11 ++-
 drivers/net/i40e/i40e_rxtx.c                  | 19 ++++--
 drivers/net/iavf/iavf_rxtx.c                  | 16 +++--
 drivers/net/ice/ice_rxtx.c                    | 20 ++++--
 drivers/net/ixgbe/ixgbe_rxtx.c                |  7 +-
 drivers/net/mlx5/mlx5_rxtx_vec.c              |  2 +
 drivers/net/virtio/virtio_ethdev.c            |  9 ++-
 lib/librte_distributor/rte_distributor.c      |  3 +-
 lib/librte_eal/arm/include/rte_vect.h         |  2 +
 lib/librte_eal/common/eal_common_options.c    | 67 +++++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h      |  8 +++
 lib/librte_eal/common/eal_options.h           |  2 +
 lib/librte_eal/include/rte_eal.h              | 33 +++++++++
 lib/librte_eal/ppc/include/rte_vect.h         |  2 +
 lib/librte_eal/rte_eal_version.map            |  4 ++
 lib/librte_eal/x86/include/rte_vect.h         |  2 +
 lib/librte_efd/rte_efd.c                      |  7 +-
 lib/librte_lpm/rte_lpm.h                      | 57 +++++++++++++---
 lib/librte_lpm/rte_lpm_altivec.h              |  2 +-
 lib/librte_lpm/rte_lpm_neon.h                 |  2 +-
 lib/librte_lpm/rte_lpm_sse.h                  |  2 +-
 lib/librte_member/rte_member_ht.c             |  3 +-
 lib/librte_net/rte_net_crc.c                  | 26 ++++---
 lib/librte_net/rte_net_crc.h                  |  3 +-
 31 files changed, 351 insertions(+), 55 deletions(-)
 create mode 100644 doc/guides/howto/avx512.rst

-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
@ 2020-09-30 13:03   ` Ciara Power
  2020-10-01 14:49     ` Coyle, David
                       ` (5 more replies)
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 02/18] eal: add default SIMD bitwidth values Ciara Power
                     ` (16 subsequent siblings)
  17 siblings, 6 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:03 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Ray Kinsella, Neil Horman

This patch adds a max SIMD bitwidth EAL configuration. The API allows
for an app to set this value. It can also be set using EAL argument
--force-max-simd-bitwidth, which will lock the value and override any
modifications made by the app.

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v3:
  - Added enum value to essentially disable using max SIMD to choose
    paths, intended for use by ARM SVE.
  - Fixed parsing bitwidth argument to return an error for values
    greater than uint16_t.
v2: Added to Doxygen comment for API.
---
 lib/librte_eal/common/eal_common_options.c | 64 ++++++++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/include/rte_eal.h           | 33 +++++++++++
 lib/librte_eal/rte_eal_version.map         |  4 ++
 5 files changed, 111 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index a5426e1234..e9117a96af 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -102,6 +102,7 @@ eal_long_options[] = {
 	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
+	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
 	{0,                     0, NULL, 0                        }
 };
 
@@ -1309,6 +1310,34 @@ eal_parse_iova_mode(const char *name)
 	return 0;
 }
 
+static int
+eal_parse_simd_bitwidth(const char *arg, bool locked)
+{
+	char *end;
+	unsigned long bitwidth;
+	int ret;
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+
+	if (arg == NULL || arg[0] == '\0')
+		return -1;
+
+	errno = 0;
+	bitwidth = strtoul(arg, &end, 0);
+
+	/* check for errors */
+	if (bitwidth > UINT16_MAX || errno != 0 || end == NULL || *end != '\0')
+		return -1;
+
+	if (bitwidth == 0)
+		bitwidth = UINT16_MAX;
+	ret = rte_set_max_simd_bitwidth(bitwidth);
+	if (ret < 0)
+		return -1;
+	internal_conf->max_simd_bitwidth.locked = locked;
+	return 0;
+}
+
 static int
 eal_parse_base_virtaddr(const char *arg)
 {
@@ -1707,6 +1736,13 @@ eal_parse_common_option(int opt, const char *optarg,
 	case OPT_NO_TELEMETRY_NUM:
 		conf->no_telemetry = 1;
 		break;
+	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
+		if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
+			return -1;
+		}
+		break;
 
 	/* don't know what to do, leave this to caller */
 	default:
@@ -1903,6 +1939,33 @@ eal_check_common_options(struct internal_config *internal_cfg)
 	return 0;
 }
 
+uint16_t
+rte_get_max_simd_bitwidth(void)
+{
+	const struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	return internal_conf->max_simd_bitwidth.bitwidth;
+}
+
+int
+rte_set_max_simd_bitwidth(uint16_t bitwidth)
+{
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	if (internal_conf->max_simd_bitwidth.locked) {
+		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
+		return -EPERM;
+	}
+
+	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth < RTE_NO_SIMD ||
+			!rte_is_power_of_2(bitwidth))) {
+		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
+		return -EINVAL;
+	}
+	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
+	return 0;
+}
+
 void
 eal_common_usage(void)
 {
@@ -1981,6 +2044,7 @@ eal_common_usage(void)
 	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
 	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
 	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
+	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
 	       "\nEAL options for DEBUG use only:\n"
 	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
 	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 13f93388a7..367e0cc19e 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -33,6 +33,12 @@ struct hugepage_info {
 	int lock_descriptor;    /**< file descriptor for hugepage dir */
 };
 
+struct simd_bitwidth {
+	/**< flag indicating if bitwidth is locked from further modification */
+	bool locked;
+	uint16_t bitwidth; /**< bitwidth value */
+};
+
 /**
  * internal configuration
  */
@@ -85,6 +91,8 @@ struct internal_config {
 	volatile unsigned int init_complete;
 	/**< indicates whether EAL has completed initialization */
 	unsigned int no_telemetry; /**< true to disable Telemetry */
+	/** max simd bitwidth path to use */
+	struct simd_bitwidth max_simd_bitwidth;
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 89769d48b4..ef33979664 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -85,6 +85,8 @@ enum {
 	OPT_TELEMETRY_NUM,
 #define OPT_NO_TELEMETRY      "no-telemetry"
 	OPT_NO_TELEMETRY_NUM,
+#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
+	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index ddcf6a2e7a..fb739f3474 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -43,6 +43,14 @@ enum rte_proc_type_t {
 	RTE_PROC_INVALID
 };
 
+enum rte_max_simd_t {
+	RTE_NO_SIMD = 64,
+	RTE_MAX_128_SIMD = 128,
+	RTE_MAX_256_SIMD = 256,
+	RTE_MAX_512_SIMD = 512,
+	RTE_MAX_SIMD_DISABLE = UINT16_MAX,
+};
+
 /**
  * Get the process type in a multi-process setup
  *
@@ -51,6 +59,31 @@ enum rte_proc_type_t {
  */
 enum rte_proc_type_t rte_eal_process_type(void);
 
+/**
+ * Get the supported SIMD bitwidth.
+ *
+ * @return
+ *   uint16_t bitwidth.
+ */
+__rte_experimental
+uint16_t rte_get_max_simd_bitwidth(void);
+
+/**
+ * Set the supported SIMD bitwidth.
+ * This API should only be called once at initialization, before EAL init.
+ *
+ * @param bitwidth
+ *   uint16_t bitwidth.
+ * @return
+ *   0 on success.
+ * @return
+ *   -EINVAL on invalid bitwidth parameter.
+ * @return
+ *   -EPERM if bitwidth is locked.
+ */
+__rte_experimental
+int rte_set_max_simd_bitwidth(uint16_t bitwidth);
+
 /**
  * Request iopl privilege for all RPL.
  *
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index c32461c663..17a7195a3d 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -397,6 +397,10 @@ EXPERIMENTAL {
 	rte_service_lcore_may_be_active;
 	rte_thread_register;
 	rte_thread_unregister;
+
+	# added in 20.11
+	rte_get_max_simd_bitwidth;
+	rte_set_max_simd_bitwidth;
 };
 
 INTERNAL {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 02/18] eal: add default SIMD bitwidth values
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth Ciara Power
@ 2020-09-30 13:03   ` Ciara Power
  2020-10-05 19:35     ` David Christensen
                       ` (2 more replies)
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 03/18] doc: add detail on using max SIMD bitwidth Ciara Power
                     ` (15 subsequent siblings)
  17 siblings, 3 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:03 UTC (permalink / raw)
  To: dev
  Cc: Ciara Power, Ruifeng Wang, Jerin Jacob, Honnappa Nagarahalli,
	David Christensen, Jan Viktorin, Bruce Richardson,
	Konstantin Ananyev

Each arch has a define for the default SIMD bitwidth value, this is used
on EAL init to set the config max SIMD bitwidth.

Cc: Ruifeng Wang <ruifeng.wang@arm.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Cc: David Christensen <drc@linux.vnet.ibm.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v3:
  - Removed unnecessary define in generic rte_vect.h
  - Changed default bitwidth for ARM to UINT16_MAX, to allow for SVE.
v2: Changed default bitwidth for Arm to 128.
---
 lib/librte_eal/arm/include/rte_vect.h      | 2 ++
 lib/librte_eal/common/eal_common_options.c | 3 +++
 lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
 lib/librte_eal/x86/include/rte_vect.h      | 2 ++
 4 files changed, 9 insertions(+)

diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
index 01c51712a1..a3508e69d5 100644
--- a/lib/librte_eal/arm/include/rte_vect.h
+++ b/lib/librte_eal/arm/include/rte_vect.h
@@ -14,6 +14,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH UINT16_MAX
+
 typedef int32x4_t xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index e9117a96af..d412cae89b 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -35,6 +35,7 @@
 #ifndef RTE_EXEC_ENV_WINDOWS
 #include <rte_telemetry.h>
 #endif
+#include <rte_vect.h>
 
 #include "eal_internal_cfg.h"
 #include "eal_options.h"
@@ -344,6 +345,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
 	CPU_ZERO(&internal_cfg->ctrl_cpuset);
 	internal_cfg->init_complete = 0;
+	internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
+	internal_cfg->max_simd_bitwidth.locked = 0;
 }
 
 static int
diff --git a/lib/librte_eal/ppc/include/rte_vect.h b/lib/librte_eal/ppc/include/rte_vect.h
index b0545c878c..70fbd0c423 100644
--- a/lib/librte_eal/ppc/include/rte_vect.h
+++ b/lib/librte_eal/ppc/include/rte_vect.h
@@ -15,6 +15,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 256
+
 typedef vector signed int xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/x86/include/rte_vect.h b/lib/librte_eal/x86/include/rte_vect.h
index df5a607623..b1df75aca7 100644
--- a/lib/librte_eal/x86/include/rte_vect.h
+++ b/lib/librte_eal/x86/include/rte_vect.h
@@ -35,6 +35,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH 256
+
 typedef __m128i xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 03/18] doc: add detail on using max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth Ciara Power
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 02/18] eal: add default SIMD bitwidth values Ciara Power
@ 2020-09-30 13:03   ` Ciara Power
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for " Ciara Power
                     ` (14 subsequent siblings)
  17 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:03 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Anatoly Burakov, John McNamara, Marko Kovacevic

This patch adds documentation on the usage of the max SIMD bitwidth EAL
setting, and how to use it to enable AVX-512 at runtime.

Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: John McNamara <john.mcnamara@intel.com>
Cc: Marko Kovacevic <marko.kovacevic@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v3:
  - Added enum value for disabling use of max SIMD to doc.
  - Added entry to HowTo index.
---
 doc/guides/howto/avx512.rst                   | 36 +++++++++++++++++++
 doc/guides/howto/index.rst                    |  1 +
 doc/guides/linux_gsg/eal_args.include.rst     | 16 +++++++++
 .../prog_guide/env_abstraction_layer.rst      | 32 +++++++++++++++++
 4 files changed, 85 insertions(+)
 create mode 100644 doc/guides/howto/avx512.rst

diff --git a/doc/guides/howto/avx512.rst b/doc/guides/howto/avx512.rst
new file mode 100644
index 0000000000..ebae0f2b4f
--- /dev/null
+++ b/doc/guides/howto/avx512.rst
@@ -0,0 +1,36 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2020 Intel Corporation.
+
+
+Using AVX-512 with DPDK
+=======================
+
+AVX-512 is not used by default in DPDK, but it can be selected at runtime by apps through the use of EAL API,
+and by the user with a commandline argument. DPDK has a setting for max SIMD bitwidth,
+which can be modified and will then limit the vector path taken by the code.
+
+
+Using the API in apps
+---------------------
+
+Apps can request DPDK uses AVX-512 at runtime, if it provides improved application performance.
+This can be done by modifying the EAL setting for max SIMD bitwidth to 512, as by default it is 256,
+which does not allow for AVX-512.
+
+.. code-block:: c
+
+   rte_set_max_simd_bitwidth(RTE_MAX_512_SIMD);
+
+This API should only be called once at initialization, before EAL init.
+For more information on the possible enum values to use as a parameter, go to :ref:`max_simd_bitwidth`:
+
+
+Using the command-line argument
+---------------------------------------------
+
+The user can select to use AVX-512 at runtime, using the following argument to set the max bitwidth::
+
+   ./app/dpdk-testpmd --force-max-simd-bitwidth=512
+
+This will override any further changes to the max SIMD bitwidth in DPDK,
+which is useful for testing purposes.
diff --git a/doc/guides/howto/index.rst b/doc/guides/howto/index.rst
index 5a97ea508c..c2a2c60ddb 100644
--- a/doc/guides/howto/index.rst
+++ b/doc/guides/howto/index.rst
@@ -20,3 +20,4 @@ HowTo Guides
     telemetry
     debug_troubleshoot
     openwrt
+    avx512
diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
index 0fe4457968..a0bfbd1a98 100644
--- a/doc/guides/linux_gsg/eal_args.include.rst
+++ b/doc/guides/linux_gsg/eal_args.include.rst
@@ -210,3 +210,19 @@ Other options
 *    ``--no-telemetry``:
 
     Disable telemetry.
+
+*    ``--force-max-simd-bitwidth=<val>``:
+
+    Specify the maximum SIMD bitwidth size to handle. This limits which vector paths,
+    if any, are taken, as any paths taken must use a bitwidth below the max bitwidth limit.
+    For example, to allow all SIMD bitwidths up to and including AVX-512::
+
+        --force-max-simd-bitwidth=512
+
+    The following example shows limiting the bitwidth to 64-bits to disable all vector code::
+
+        --force-max-simd-bitwidth=64
+
+    To disable use of max SIMD bitwidth limit::
+
+        --force-max-simd-bitwidth=0
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index f64ae953d1..58f591e921 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -486,6 +486,38 @@ the desired addressing mode when virtual devices that are not directly attached
 To facilitate forcing the IOVA mode to a specific value the EAL command line option ``--iova-mode`` can
 be used to select either physical addressing('pa') or virtual addressing('va').
 
+.. _max_simd_bitwidth:
+
+
+Max SIMD bitwidth
+~~~~~~~~~~~~~~~~~
+
+The EAL provides a single setting to limit the max SIMD bitwidth used by DPDK,
+which is used in determining the vector path, if any, chosen by a component.
+The value can be set at runtime by an application using the 'rte_set_max_simd_bitwidth(uint16_t bitwidth)' function,
+which should only be called once at initialization, before EAL init.
+The value can be overridden by the user using the EAL command-line option '--force-max-simd-bitwidth'.
+
+When choosing a vector path, along with checking the CPU feature support,
+the value of the max SIMD bitwidth must also be checked, and can be retrieved using the 'rte_get_max_simd_bitwidth()' function.
+The value should be compared against the enum values for accepted max SIMD bitwidths:
+
+.. code-block:: c
+
+   enum rte_max_simd_t {
+       RTE_NO_SIMD = 64,
+       RTE_MAX_128_SIMD = 128,
+       RTE_MAX_256_SIMD = 256,
+       RTE_MAX_512_SIMD = 512,
+       RTE_MAX_SIMD_DISABLE = UINT16_MAX,
+   };
+
+    if (rte_get_max_simd_bitwidth() >= RTE_MAX_512_SIMD)
+        /* Take AVX-512 vector path */
+    else if (rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
+        /* Take AVX2 vector path */
+
+
 Memory Segments and Memory Zones (memzone)
 ------------------------------------------
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (2 preceding siblings ...)
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 03/18] doc: add detail on using max SIMD bitwidth Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-10-08 15:21     ` Ananyev, Konstantin
  2020-10-09  3:02     ` Guo, Jia
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 05/18] net/axgbe: " Ciara Power
                     ` (13 subsequent siblings)
  17 siblings, 2 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Beilei Xing, Jeff Guo

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Beilei Xing <beilei.xing@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 60b33d20a1..9b535b52fa 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3098,7 +3098,8 @@ static eth_rx_burst_t
 i40e_get_latest_rx_vec(bool scatter)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3115,7 +3116,8 @@ i40e_get_recommend_rx_vec(bool scatter)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3154,7 +3156,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 	}
 
-	if (ad->rx_vec_allowed) {
+	if (ad->rx_vec_allowed  && rte_get_max_simd_bitwidth()
+			>= RTE_MAX_128_SIMD) {
 		/* Vec Rx path */
 		PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
 				dev->data->port_id);
@@ -3268,7 +3271,8 @@ static eth_tx_burst_t
 i40e_get_latest_tx_vec(void)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3283,7 +3287,8 @@ i40e_get_recommend_tx_vec(void)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3311,7 +3316,9 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 	}
 
 	if (ad->tx_simple_allowed) {
-		if (ad->tx_vec_allowed) {
+		if (ad->tx_vec_allowed &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
 			if (ad->use_latest_vec)
 				dev->tx_pkt_burst =
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 05/18] net/axgbe: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (3 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-09-30 13:29     ` Somalapuram, Amaranath
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 06/18] net/bnxt: " Ciara Power
                     ` (12 subsequent siblings)
  17 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Somalapuram Amaranath

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Somalapuram Amaranath <asomalap@amd.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/axgbe/axgbe_rxtx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/axgbe/axgbe_rxtx.c b/drivers/net/axgbe/axgbe_rxtx.c
index bc93becaa5..6093ec7279 100644
--- a/drivers/net/axgbe/axgbe_rxtx.c
+++ b/drivers/net/axgbe/axgbe_rxtx.c
@@ -557,7 +557,8 @@ int axgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!pdata->tx_queues)
 		pdata->tx_queues = dev->data->tx_queues;
 
-	if (txq->vector_disable)
+	if (txq->vector_disable || rte_get_max_simd_bitwidth()
+			< RTE_MAX_128_SIMD)
 		dev->tx_pkt_burst = &axgbe_xmit_pkts;
 	else
 #ifdef RTE_ARCH_X86
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 06/18] net/bnxt: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (4 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 05/18] net/axgbe: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 07/18] net/enic: " Ciara Power
                     ` (11 subsequent siblings)
  17 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Ajit Khaparde, Somnath Kotur

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/bnxt/bnxt_ethdev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 05e9a6abbf..5cd522f1fd 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1122,7 +1122,8 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev)
 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
 		DEV_RX_OFFLOAD_RSS_HASH |
 		DEV_RX_OFFLOAD_VLAN_FILTER)) &&
-	    !BNXT_TRUFLOW_EN(bp) && BNXT_NUM_ASYNC_CPR(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) && BNXT_NUM_ASYNC_CPR(bp) &&
+		rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		PMD_DRV_LOG(INFO, "Using vector mode receive for port %d\n",
 			    eth_dev->data->port_id);
 		bp->flags |= BNXT_FLAG_RX_VECTOR_PKT_MODE;
@@ -1154,7 +1155,8 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 	 */
 	if (!eth_dev->data->scattered_rx &&
 	    !eth_dev->data->dev_conf.txmode.offloads &&
-	    !BNXT_TRUFLOW_EN(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) &&
+	    rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		PMD_DRV_LOG(INFO, "Using vector mode transmit for port %d\n",
 			    eth_dev->data->port_id);
 		return bnxt_xmit_pkts_vec;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 07/18] net/enic: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (5 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 06/18] net/bnxt: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 08/18] net/fm10k: " Ciara Power
                     ` (10 subsequent siblings)
  17 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, John Daley, Hyong Youb Kim

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: John Daley <johndale@cisco.com>
Cc: Hyong Youb Kim <hyonkim@cisco.com>

Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/enic/enic_rxtx_vec_avx2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c b/drivers/net/enic/enic_rxtx_vec_avx2.c
index 676b9f5fdb..5db43bdbb8 100644
--- a/drivers/net/enic/enic_rxtx_vec_avx2.c
+++ b/drivers/net/enic/enic_rxtx_vec_avx2.c
@@ -821,7 +821,8 @@ enic_use_vector_rx_handler(struct rte_eth_dev *eth_dev)
 	fconf = &eth_dev->data->dev_conf.fdir_conf;
 	if (fconf->mode != RTE_FDIR_MODE_NONE)
 		return false;
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD) {
 		ENICPMD_LOG(DEBUG, " use the non-scatter avx2 Rx handler");
 		eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
 		enic->use_noscatter_vec_rx_handler = 1;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 08/18] net/fm10k: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (6 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 07/18] net/enic: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-10-09  0:18     ` Zhang, Qi Z
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 09/18] net/iavf: " Ciara Power
                     ` (9 subsequent siblings)
  17 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Qi Zhang, Xiao Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qi Zhang <qi.z.zhang@intel.com>
Cc: Xiao Wang <xiao.w.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/fm10k/fm10k_ethdev.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 5771d83b55..a8bc1036a3 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -2930,7 +2930,9 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
 		/* primary process has set the ftag flag and offloads */
 		txq = dev->data->tx_queues[0];
-		if (fm10k_tx_vec_condition_check(txq)) {
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth()
+				< RTE_MAX_128_SIMD) {
 			dev->tx_pkt_burst = fm10k_xmit_pkts;
 			dev->tx_pkt_prepare = fm10k_prep_pkts;
 			PMD_INIT_LOG(DEBUG, "Use regular Tx func");
@@ -2949,7 +2951,8 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 		txq = dev->data->tx_queues[i];
 		txq->tx_ftag_en = tx_ftag_en;
 		/* Check if Vector Tx is satisfied */
-		if (fm10k_tx_vec_condition_check(txq))
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD)
 			use_sse = 0;
 	}
 
@@ -2983,7 +2986,9 @@ fm10k_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met.
 	 */
 	if (!fm10k_rx_vec_condition_check(dev) &&
-			dev_info->rx_vec_allowed && !rx_ftag_en) {
+			dev_info->rx_vec_allowed && !rx_ftag_en &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 		if (dev->data->scattered_rx)
 			dev->rx_pkt_burst = fm10k_recv_scattered_pkts_vec;
 		else
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 09/18] net/iavf: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (7 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 08/18] net/fm10k: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 10/18] net/ice: " Ciara Power
                     ` (8 subsequent siblings)
  17 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Jingjing Wu, Beilei Xing

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Jingjing Wu <jingjing.wu@intel.com>
Cc: Beilei Xing <beilei.xing@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 05a7dd898a..b798d082a2 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2105,14 +2105,16 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_rx_vec_dev_check(dev)) {
+	if (!iavf_rx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
 			rxq = dev->data->rx_queues[i];
 			(void)iavf_rxq_vec_setup(rxq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 			use_avx2 = true;
 
 		if (dev->data->scattered_rx) {
@@ -2178,7 +2180,8 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_tx_vec_dev_check(dev)) {
+	if (!iavf_tx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD) {
 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
 			txq = dev->data->tx_queues[i];
 			if (!txq)
@@ -2186,8 +2189,9 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 			iavf_txq_vec_setup(txq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 			use_avx2 = true;
 
 		PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 10/18] net/ice: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (8 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 09/18] net/iavf: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-10-09  0:04     ` Zhang, Qi Z
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 11/18] net/ixgbe: " Ciara Power
                     ` (7 subsequent siblings)
  17 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Qiming Yang, Qi Zhang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qiming Yang <qiming.yang@intel.com>
Cc: Qi Zhang <qi.z.zhang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/ice/ice_rxtx.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index fef6ad4544..5a29af743c 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -2936,7 +2936,9 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed) {
+		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			ad->rx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
 				rxq = dev->data->rx_queues[i];
@@ -2946,8 +2948,10 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_MAX_256_SIMD)
 				use_avx2 = true;
 
 		} else {
@@ -3114,7 +3118,9 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_tx_vec_dev_check(dev)) {
+		if (!ice_tx_vec_dev_check(dev) &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			ad->tx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_tx_queues; i++) {
 				txq = dev->data->tx_queues[i];
@@ -3124,8 +3130,10 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_MAX_256_SIMD)
 				use_avx2 = true;
 
 		} else {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (9 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 10/18] net/ice: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-10-08 15:05     ` Ananyev, Konstantin
  2020-10-10 13:13     ` Wang, Haiyue
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 12/18] net/mlx5: " Ciara Power
                     ` (6 subsequent siblings)
  17 siblings, 2 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Wei Zhao, Jeff Guo, Haiyue Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Wei Zhao <wei.zhao1@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 977ecf5137..eadc7183f2 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
 		dev->tx_pkt_prepare = NULL;
 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
-					ixgbe_txq_vec_setup(txq) == 0)) {
+					ixgbe_txq_vec_setup(txq) == 0) &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_MAX_128_SIMD) {
 			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
 			dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
 		} else
@@ -4743,7 +4745,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met and Rx Bulk Allocation should be allowed.
 	 */
 	if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
-	    !adapter->rx_bulk_alloc_allowed) {
+	    !adapter->rx_bulk_alloc_allowed ||
+			rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD) {
 		PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
 				    "preconditions",
 			     dev->data->port_id);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 12/18] net/mlx5: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (10 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 11/18] net/ixgbe: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-10-05  6:30     ` Slava Ovsiienko
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 13/18] net/virtio: " Ciara Power
                     ` (5 subsequent siblings)
  17 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Matan Azrad, Shahaf Shuler, Viacheslav Ovsiienko

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Matan Azrad <matan@mellanox.com>
Cc: Shahaf Shuler <shahafs@mellanox.com>
Cc: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v2: Moved check for max bitwidth into existing check vec
    support function.
---
 drivers/net/mlx5/mlx5_rxtx_vec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 711dcd35fa..c384c737dc 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -148,6 +148,8 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	uint32_t i;
 
+	if (rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD)
+		return -ENOTSUP;
 	if (!priv->config.rx_vec_en)
 		return -ENOTSUP;
 	if (mlx5_mprq_enabled(dev))
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 13/18] net/virtio: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (11 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 12/18] net/mlx5: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 14/18] distributor: " Ciara Power
                     ` (4 subsequent siblings)
  17 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Maxime Coquelin, Chenbo Xia, Zhihong Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: Chenbo Xia <chenbo.xia@intel.com>
Cc: Zhihong Wang <zhihong.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v3: Moved max SIMD bitwidth check to configure function with other vec
    support checks.
---
 drivers/net/virtio/virtio_ethdev.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 013a2904e6..f749e81405 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -2306,7 +2306,8 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
 		     !vtpci_with_feature(hw, VIRTIO_F_IN_ORDER) ||
-		     !vtpci_with_feature(hw, VIRTIO_F_VERSION_1))) {
+		     !vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
+		     rte_get_max_simd_bitwidth() < RTE_MAX_512_SIMD)) {
 			PMD_DRV_LOG(INFO,
 				"disabled packed ring vectorized path for requirements not met");
 			hw->use_vec_rx = 0;
@@ -2359,6 +2360,12 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 					"disabled split ring vectorized rx for offloading enabled");
 				hw->use_vec_rx = 0;
 			}
+
+			if (rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD) {
+				PMD_DRV_LOG(INFO,
+					"disabled split ring vectorized rx, max SIMD bitwidth too low");
+				hw->use_vec_rx = 0;
+			}
 		}
 	}
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 14/18] distributor: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (12 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 13/18] net/virtio: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-10-06 12:17     ` David Hunt
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 15/18] member: " Ciara Power
                     ` (3 subsequent siblings)
  17 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, David Hunt

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: David Hunt <david.hunt@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_distributor/rte_distributor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c
index 1c047f065a..9f0a9b1d48 100644
--- a/lib/librte_distributor/rte_distributor.c
+++ b/lib/librte_distributor/rte_distributor.c
@@ -636,7 +636,8 @@ rte_distributor_create(const char *name,
 
 	d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
 #if defined(RTE_ARCH_X86)
-	d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
+	if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
+		d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
 #endif
 
 	/*
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 15/18] member: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (13 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 14/18] distributor: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-10-07  0:51     ` Wang, Yipeng1
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 16/18] efd: " Ciara Power
                     ` (2 subsequent siblings)
  17 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Yipeng Wang, Sameh Gobriel

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU
enabled path.

Cc: Yipeng Wang <yipeng1.wang@intel.com>
Cc: Sameh Gobriel <sameh.gobriel@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_member/rte_member_ht.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c
index cbcd0d4407..71e3cf7b52 100644
--- a/lib/librte_member/rte_member_ht.c
+++ b/lib/librte_member/rte_member_ht.c
@@ -113,7 +113,8 @@ rte_member_create_ht(struct rte_member_setsum *ss,
 	}
 #if defined(RTE_ARCH_X86)
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
-			RTE_MEMBER_BUCKET_ENTRIES == 16)
+			RTE_MEMBER_BUCKET_ENTRIES == 16 &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
 	else
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 16/18] efd: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (14 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 15/18] member: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-10-07  0:51     ` Wang, Yipeng1
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 17/18] net: " Ciara Power
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime Ciara Power
  17 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Byron Marohn, Yipeng Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Byron Marohn <byron.marohn@intel.com>
Cc: Yipeng Wang <yipeng1.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_efd/rte_efd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
index 6a799556d4..509ecc8256 100644
--- a/lib/librte_efd/rte_efd.c
+++ b/lib/librte_efd/rte_efd.c
@@ -645,7 +645,9 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len,
 	 * For less than 4 bits, scalar function performs better
 	 * than vectorised version
 	 */
-	if (RTE_EFD_VALUE_NUM_BITS > 3 && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (RTE_EFD_VALUE_NUM_BITS > 3
+			&& rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)
+			&& rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
 		table->lookup_fn = EFD_LOOKUP_AVX2;
 	else
 #endif
@@ -655,7 +657,8 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len,
 	 * than vectorised version
 	 */
 	if (RTE_EFD_VALUE_NUM_BITS > 16 &&
-	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
+			rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
 		table->lookup_fn = EFD_LOOKUP_NEON;
 	else
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (15 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 16/18] efd: " Ciara Power
@ 2020-09-30 13:04   ` " Ciara Power
  2020-09-30 15:03     ` Coyle, David
  2020-10-06  9:58     ` Olivier Matz
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime Ciara Power
  17 siblings, 2 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev; +Cc: Ciara Power, Jasvinder Singh, Olivier Matz

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

The vector path was initially chosen in RTE_INIT, however this is no
longer suitable as we cannot check the max SIMD bitwidth at that time.
The default chosen in RTE_INIT is now scalar. For best performance
and to use vector paths, apps must explicitly call the set algorithm
function before using other functions from this library, as this is
where vector handlers are now chosen.

Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v3:
  - Moved choosing vector paths out of RTE_INIT.
  - Moved checking max_simd_bitwidth into the set_alg function.
---
 lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++---------
 lib/librte_net/rte_net_crc.h |  3 ++-
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
index 9fd4794a9d..241eb16399 100644
--- a/lib/librte_net/rte_net_crc.c
+++ b/lib/librte_net/rte_net_crc.c
@@ -9,6 +9,7 @@
 #include <rte_cpuflags.h>
 #include <rte_common.h>
 #include <rte_net_crc.h>
+#include <rte_eal.h>
 
 #if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ)
 #define X86_64_SSE42_PCLMULQDQ     1
@@ -60,6 +61,9 @@ static rte_net_crc_handler handlers_neon[] = {
 };
 #endif
 
+static uint16_t max_simd_bitwidth;
+#define RTE_LOGTYPE_NET RTE_LOGTYPE_USER1
+
 /**
  * Reflect the bits about the middle
  *
@@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
 void
 rte_net_crc_set_alg(enum rte_net_crc_alg alg)
 {
+	if (max_simd_bitwidth == 0)
+		max_simd_bitwidth = rte_get_max_simd_bitwidth();
+
 	switch (alg) {
 #ifdef X86_64_SSE42_PCLMULQDQ
 	case RTE_NET_CRC_SSE42:
-		handlers = handlers_sse42;
-		break;
+		if (max_simd_bitwidth >= RTE_MAX_128_SIMD) {
+			handlers = handlers_sse42;
+			return;
+		}
+		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using scalar\n");
 #elif defined ARM64_NEON_PMULL
 		/* fall-through */
 	case RTE_NET_CRC_NEON:
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
+		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
+				max_simd_bitwidth >= RTE_MAX_128_SIMD) {
 			handlers = handlers_neon;
-			break;
+			return;
 		}
+		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low or CPU flag not enabled, using scalar\n");
 #endif
 		/* fall-through */
 	case RTE_NET_CRC_SCALAR:
@@ -184,19 +196,15 @@ rte_net_crc_calc(const void *data,
 /* Select highest available crc algorithm as default one */
 RTE_INIT(rte_net_crc_init)
 {
-	enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
-
 	rte_net_crc_scalar_init();
 
 #ifdef X86_64_SSE42_PCLMULQDQ
-	alg = RTE_NET_CRC_SSE42;
 	rte_net_crc_sse42_init();
 #elif defined ARM64_NEON_PMULL
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
-		alg = RTE_NET_CRC_NEON;
 		rte_net_crc_neon_init();
 	}
 #endif
 
-	rte_net_crc_set_alg(alg);
+	rte_net_crc_set_alg(RTE_NET_CRC_SCALAR);
 }
diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
index 16e85ca970..7a45ebe193 100644
--- a/lib/librte_net/rte_net_crc.h
+++ b/lib/librte_net/rte_net_crc.h
@@ -28,7 +28,8 @@ enum rte_net_crc_alg {
 /**
  * This API set the CRC computation algorithm (i.e. scalar version,
  * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
- * structure.
+ * structure. This should be called before any other functions, to
+ * choose the algorithm for best performance.
  *
  * @param alg
  *   This parameter is used to select the CRC implementation version.
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
                     ` (16 preceding siblings ...)
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 17/18] net: " Ciara Power
@ 2020-09-30 13:04   ` Ciara Power
  2020-09-30 13:54     ` Medvedkin, Vladimir
  2020-10-08 15:19     ` David Marchand
  17 siblings, 2 replies; 276+ messages in thread
From: Ciara Power @ 2020-09-30 13:04 UTC (permalink / raw)
  To: dev
  Cc: Ciara Power, Bruce Richardson, Vladimir Medvedkin, Jerin Jacob,
	Ruifeng Wang

When choosing the vector path, max SIMD bitwidth is now checked to
ensure a vector path is allowable. To do this, rather than the vector
lookup functions being called directly from apps, a generic lookup
function is called which will call the vector functions if suitable.

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_lpm/rte_lpm.h         | 57 ++++++++++++++++++++++++++------
 lib/librte_lpm/rte_lpm_altivec.h |  2 +-
 lib/librte_lpm/rte_lpm_neon.h    |  2 +-
 lib/librte_lpm/rte_lpm_sse.h     |  2 +-
 4 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 03da2d37e0..edba7cafd5 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -397,8 +397,18 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
 /* Mask four results. */
 #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ffffff00ffffff)
 
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+#include "rte_lpm_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "rte_lpm_altivec.h"
+#else
+#include "rte_lpm_sse.h"
+#endif
+
 /**
- * Lookup four IP addresses in an LPM table.
+ * Lookup four IP addresses in an LPM table individually by calling the
+ * lookup function for each ip. This is used when lookupx4 is called but
+ * the vector path is not suitable.
  *
  * @param lpm
  *   LPM object handle
@@ -417,16 +427,43 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
  *   if lookup would fail.
  */
 static inline void
-rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
-	uint32_t defv);
+rte_lpm_lookupx4_scalar(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+	uint32_t defv)
+{
+	int i;
+	for (i = 0; i < 4; i++)
+		if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
+			hop[i] = defv; /* lookupx4 expected to set on failure */
+}
 
-#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
-#include "rte_lpm_neon.h"
-#elif defined(RTE_ARCH_PPC_64)
-#include "rte_lpm_altivec.h"
-#else
-#include "rte_lpm_sse.h"
-#endif
+/**
+ * Lookup four IP addresses in an LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   Four IPs to be looked up in the LPM table
+ * @param hop
+ *   Next hop of the most specific rule found for IP (valid on lookup hit only).
+ *   This is an 4 elements array of two byte values.
+ *   If the lookup was successful for the given IP, then least significant byte
+ *   of the corresponding element is the  actual next hop and the most
+ *   significant byte is zero.
+ *   If the lookup for the given IP failed, then corresponding element would
+ *   contain default value, see description of then next parameter.
+ * @param defv
+ *   Default value to populate into corresponding element of hop[] array,
+ *   if lookup would fail.
+ */
+static inline void
+rte_lpm_lookupx4(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+	uint32_t defv)
+{
+	if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
+		rte_lpm_lookupx4_vec(lpm, ip, hop, defv);
+	else
+		rte_lpm_lookupx4_scalar(lpm, ip, hop, defv);
+}
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_lpm/rte_lpm_altivec.h b/lib/librte_lpm/rte_lpm_altivec.h
index 228c41b38e..82142d3351 100644
--- a/lib/librte_lpm/rte_lpm_altivec.h
+++ b/lib/librte_lpm/rte_lpm_altivec.h
@@ -16,7 +16,7 @@ extern "C" {
 #endif
 
 static inline void
-rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv)
 {
 	vector signed int i24;
diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
index 6c131d3125..14b184515d 100644
--- a/lib/librte_lpm/rte_lpm_neon.h
+++ b/lib/librte_lpm/rte_lpm_neon.h
@@ -16,7 +16,7 @@ extern "C" {
 #endif
 
 static inline void
-rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv)
 {
 	uint32x4_t i24;
diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
index 44770b6ff8..cb5477c6cf 100644
--- a/lib/librte_lpm/rte_lpm_sse.h
+++ b/lib/librte_lpm/rte_lpm_sse.h
@@ -15,7 +15,7 @@ extern "C" {
 #endif
 
 static inline void
-rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv)
 {
 	__m128i i24;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 05/18] net/axgbe: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 05/18] net/axgbe: " Ciara Power
@ 2020-09-30 13:29     ` Somalapuram, Amaranath
  0 siblings, 0 replies; 276+ messages in thread
From: Somalapuram, Amaranath @ 2020-09-30 13:29 UTC (permalink / raw)
  To: Ciara Power, dev

-----Original Message-----
From: Ciara Power <ciara.power@intel.com> 
Sent: Wednesday, September 30, 2020 6:34 PM
To: dev@dpdk.org
Cc: Ciara Power <ciara.power@intel.com>; Somalapuram, Amaranath <Amaranath.Somalapuram@amd.com>
Subject: [PATCH v3 05/18] net/axgbe: add checks for max SIMD bitwidth

[CAUTION: External Email]

When choosing a vector path to take, an extra condition must be satisfied to ensure the max SIMD bitwidth allows for the CPU enabled path.

Cc: Somalapuram Amaranath <asomalap@amd.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 drivers/net/axgbe/axgbe_rxtx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/axgbe/axgbe_rxtx.c b/drivers/net/axgbe/axgbe_rxtx.c index bc93becaa5..6093ec7279 100644
--- a/drivers/net/axgbe/axgbe_rxtx.c
+++ b/drivers/net/axgbe/axgbe_rxtx.c
@@ -557,7 +557,8 @@ int axgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        if (!pdata->tx_queues)
                pdata->tx_queues = dev->data->tx_queues;

-       if (txq->vector_disable)
+       if (txq->vector_disable || rte_get_max_simd_bitwidth()
+                       < RTE_MAX_128_SIMD)
                dev->tx_pkt_burst = &axgbe_xmit_pkts;
        else
 #ifdef RTE_ARCH_X86
--
2.17.1

Acked-by: Amaranath Somalapuram <asomalap@amd.com>

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime Ciara Power
@ 2020-09-30 13:54     ` Medvedkin, Vladimir
  2020-10-08 14:40       ` Ananyev, Konstantin
  2020-10-08 15:19     ` David Marchand
  1 sibling, 1 reply; 276+ messages in thread
From: Medvedkin, Vladimir @ 2020-09-30 13:54 UTC (permalink / raw)
  To: Ciara Power, dev; +Cc: Bruce Richardson, Jerin Jacob, Ruifeng Wang

Hi Ciara,


On 30/09/2020 14:04, Ciara Power wrote:
> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure a vector path is allowable. To do this, rather than the vector
> lookup functions being called directly from apps, a generic lookup
> function is called which will call the vector functions if suitable.
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>   lib/librte_lpm/rte_lpm.h         | 57 ++++++++++++++++++++++++++------
>   lib/librte_lpm/rte_lpm_altivec.h |  2 +-
>   lib/librte_lpm/rte_lpm_neon.h    |  2 +-
>   lib/librte_lpm/rte_lpm_sse.h     |  2 +-
>   4 files changed, 50 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index 03da2d37e0..edba7cafd5 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -397,8 +397,18 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
>   /* Mask four results. */
>   #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ffffff00ffffff)
>   
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +#include "rte_lpm_neon.h"
> +#elif defined(RTE_ARCH_PPC_64)
> +#include "rte_lpm_altivec.h"
> +#else
> +#include "rte_lpm_sse.h"
> +#endif
> +
>   /**
> - * Lookup four IP addresses in an LPM table.
> + * Lookup four IP addresses in an LPM table individually by calling the
> + * lookup function for each ip. This is used when lookupx4 is called but
> + * the vector path is not suitable.
>    *
>    * @param lpm
>    *   LPM object handle
> @@ -417,16 +427,43 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
>    *   if lookup would fail.
>    */
>   static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> -	uint32_t defv);
> +rte_lpm_lookupx4_scalar(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +	uint32_t defv)
> +{
> +	int i;
> +	for (i = 0; i < 4; i++)
> +		if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
> +			hop[i] = defv; /* lookupx4 expected to set on failure */
> +}
>   
> -#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> -#include "rte_lpm_neon.h"
> -#elif defined(RTE_ARCH_PPC_64)
> -#include "rte_lpm_altivec.h"
> -#else
> -#include "rte_lpm_sse.h"
> -#endif
> +/**
> + * Lookup four IP addresses in an LPM table.
> + *
> + * @param lpm
> + *   LPM object handle
> + * @param ip
> + *   Four IPs to be looked up in the LPM table
> + * @param hop
> + *   Next hop of the most specific rule found for IP (valid on lookup hit only).
> + *   This is an 4 elements array of two byte values.
> + *   If the lookup was successful for the given IP, then least significant byte
> + *   of the corresponding element is the  actual next hop and the most
> + *   significant byte is zero.
> + *   If the lookup for the given IP failed, then corresponding element would
> + *   contain default value, see description of then next parameter.
> + * @param defv
> + *   Default value to populate into corresponding element of hop[] array,
> + *   if lookup would fail.
> + */
> +static inline void
> +rte_lpm_lookupx4(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +	uint32_t defv)
> +{
> +	if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
> +		rte_lpm_lookupx4_vec(lpm, ip, hop, defv);
> +	else
> +		rte_lpm_lookupx4_scalar(lpm, ip, hop, defv);
> +}

I'm afraid this will lead to a drop in performance. rte_lpm_lookupx4 is 
used in the hot path, and a bulk size is too small to amortize the cost 
of adding this extra logic.

>   
>   #ifdef __cplusplus
>   }
> diff --git a/lib/librte_lpm/rte_lpm_altivec.h b/lib/librte_lpm/rte_lpm_altivec.h
> index 228c41b38e..82142d3351 100644
> --- a/lib/librte_lpm/rte_lpm_altivec.h
> +++ b/lib/librte_lpm/rte_lpm_altivec.h
> @@ -16,7 +16,7 @@ extern "C" {
>   #endif
>   
>   static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>   	uint32_t defv)
>   {
>   	vector signed int i24;
> diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
> index 6c131d3125..14b184515d 100644
> --- a/lib/librte_lpm/rte_lpm_neon.h
> +++ b/lib/librte_lpm/rte_lpm_neon.h
> @@ -16,7 +16,7 @@ extern "C" {
>   #endif
>   
>   static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>   	uint32_t defv)
>   {
>   	uint32x4_t i24;
> diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
> index 44770b6ff8..cb5477c6cf 100644
> --- a/lib/librte_lpm/rte_lpm_sse.h
> +++ b/lib/librte_lpm/rte_lpm_sse.h
> @@ -15,7 +15,7 @@ extern "C" {
>   #endif
>   
>   static inline void
> -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>   	uint32_t defv)
>   {
>   	__m128i i24;
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 17/18] net: " Ciara Power
@ 2020-09-30 15:03     ` Coyle, David
  2020-09-30 15:49       ` Singh, Jasvinder
  2020-10-06  9:58     ` Olivier Matz
  1 sibling, 1 reply; 276+ messages in thread
From: Coyle, David @ 2020-09-30 15:03 UTC (permalink / raw)
  To: Power, Ciara, dev
  Cc: Power, Ciara, Singh, Jasvinder, Olivier Matz, O'loingsigh,
	Mairtin, Ryan, Brendan, Richardson, Bruce

Hi Ciara,

> From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path.
> 
> The vector path was initially chosen in RTE_INIT, however this is no longer
> suitable as we cannot check the max SIMD bitwidth at that time.
> The default chosen in RTE_INIT is now scalar. For best performance and to
> use vector paths, apps must explicitly call the set algorithm function before
> using other functions from this library, as this is where vector handlers are
> now chosen.

[DC] Has it been decided that it is ok to now require applications to pick the
CRC algorithm they want to use?

An application which previously automatically got SSE4.2 CRC, for example, will
now automatically only get scalar.

If this is ok, this should probably be called out explicitly in release notes as it may
not be Immediately noticeable to users that they now need to select the CRC algo.

Actually, in general, the release notes need to be updated for this patchset.

> 
> Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v3:
>   - Moved choosing vector paths out of RTE_INIT.
>   - Moved checking max_simd_bitwidth into the set_alg function.
> ---
>  lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++---------
> lib/librte_net/rte_net_crc.h |  3 ++-
>  2 files changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index
> 9fd4794a9d..241eb16399 100644
> --- a/lib/librte_net/rte_net_crc.c
> +++ b/lib/librte_net/rte_net_crc.c

<snip>

> @@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data,
> uint32_t data_len)  void  rte_net_crc_set_alg(enum rte_net_crc_alg alg)  {
> +	if (max_simd_bitwidth == 0)
> +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +
>  	switch (alg) {
>  #ifdef X86_64_SSE42_PCLMULQDQ
>  	case RTE_NET_CRC_SSE42:
> -		handlers = handlers_sse42;
> -		break;
> +		if (max_simd_bitwidth >= RTE_MAX_128_SIMD) {
> +			handlers = handlers_sse42;
> +			return;
> +		}
> +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using
> scalar\n");

[DC] Not sure if you're aware but there is another patchset which adds an AVX512 CRC
implementation and run-time checking of cpuflags to select the CRC path to use:
https://patchwork.dpdk.org/project/dpdk/list/?series=12596

There will be a task to merge these 2 patchsets if both are merged. It looks fairly
straightforward to me to merge these, but it would be good if you take a look too


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-09-30 15:03     ` Coyle, David
@ 2020-09-30 15:49       ` Singh, Jasvinder
  2020-10-01 14:16         ` Coyle, David
  0 siblings, 1 reply; 276+ messages in thread
From: Singh, Jasvinder @ 2020-09-30 15:49 UTC (permalink / raw)
  To: Coyle, David, Power, Ciara, dev
  Cc: Power, Ciara, Olivier Matz, O'loingsigh, Mairtin, Ryan,
	Brendan, Richardson, Bruce



> -----Original Message-----
> From: Coyle, David <david.coyle@intel.com>
> Sent: Wednesday, September 30, 2020 4:04 PM
> To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Singh, Jasvinder
> <jasvinder.singh@intel.com>; Olivier Matz <olivier.matz@6wind.com>;
> O'loingsigh, Mairtin <mairtin.oloingsigh@intel.com>; Ryan, Brendan
> <brendan.ryan@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD
> bitwidth
> 
> Hi Ciara,
> 
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power When
> > choosing a vector path to take, an extra condition must be satisfied
> > to ensure the max SIMD bitwidth allows for the CPU enabled path.
> >
> > The vector path was initially chosen in RTE_INIT, however this is no
> > longer suitable as we cannot check the max SIMD bitwidth at that time.
> > The default chosen in RTE_INIT is now scalar. For best performance and
> > to use vector paths, apps must explicitly call the set algorithm
> > function before using other functions from this library, as this is
> > where vector handlers are now chosen.
> 
> [DC] Has it been decided that it is ok to now require applications to pick the
> CRC algorithm they want to use?
> 
> An application which previously automatically got SSE4.2 CRC, for example,
> will now automatically only get scalar.
> 
> If this is ok, this should probably be called out explicitly in release notes as it
> may not be Immediately noticeable to users that they now need to select the
> CRC algo.
> 
> Actually, in general, the release notes need to be updated for this patchset.

The decision to move rte_set_alg() out of RTE_INIT was taken to avoid check on max_simd_bitwidth in data path for every single time when crc_calc() api is invoked. Based on my understanding, max_simd_bitwidth is set after eal init, and when used in crc_calc(), it might override the default crc algo set during RTE_INIT. Therefore, to avoid extra check on max_simd_bitwidth in data path,  better option will be to use this static configuration one time after eal init in the set_algo API. 

 
> >
> > Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
> >
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> >
> > ---
> > v3:
> >   - Moved choosing vector paths out of RTE_INIT.
> >   - Moved checking max_simd_bitwidth into the set_alg function.
> > ---
> >  lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++---------
> > lib/librte_net/rte_net_crc.h |  3 ++-
> >  2 files changed, 19 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/librte_net/rte_net_crc.c
> > b/lib/librte_net/rte_net_crc.c index
> > 9fd4794a9d..241eb16399 100644
> > --- a/lib/librte_net/rte_net_crc.c
> > +++ b/lib/librte_net/rte_net_crc.c
> 
> <snip>
> 
> > @@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data,
> > uint32_t data_len)  void  rte_net_crc_set_alg(enum rte_net_crc_alg
> > alg)  {
> > +	if (max_simd_bitwidth == 0)
> > +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> > +
> >  	switch (alg) {
> >  #ifdef X86_64_SSE42_PCLMULQDQ
> >  	case RTE_NET_CRC_SSE42:
> > -		handlers = handlers_sse42;
> > -		break;
> > +		if (max_simd_bitwidth >= RTE_MAX_128_SIMD) {
> > +			handlers = handlers_sse42;
> > +			return;
> > +		}
> > +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using
> > scalar\n");
> 
> [DC] Not sure if you're aware but there is another patchset which adds an
> AVX512 CRC implementation and run-time checking of cpuflags to select the
> CRC path to use:
> https://patchwork.dpdk.org/project/dpdk/list/?series=12596
> 
> There will be a task to merge these 2 patchsets if both are merged. It looks
> fairly straightforward to me to merge these, but it would be good if you take
> a look too


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-09-30 15:49       ` Singh, Jasvinder
@ 2020-10-01 14:16         ` Coyle, David
  2020-10-01 14:19           ` Power, Ciara
  0 siblings, 1 reply; 276+ messages in thread
From: Coyle, David @ 2020-10-01 14:16 UTC (permalink / raw)
  To: Singh, Jasvinder, Power, Ciara, dev
  Cc: Power, Ciara, Olivier Matz, O'loingsigh, Mairtin, Ryan,
	Brendan, Richardson, Bruce

Hi Jasvinder/Ciara

> -----Original Message-----
> From: Singh, Jasvinder <jasvinder.singh@intel.com>
> >
> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power When
> > > choosing a vector path to take, an extra condition must be satisfied
> > > to ensure the max SIMD bitwidth allows for the CPU enabled path.
> > >
> > > The vector path was initially chosen in RTE_INIT, however this is no
> > > longer suitable as we cannot check the max SIMD bitwidth at that time.
> > > The default chosen in RTE_INIT is now scalar. For best performance
> > > and to use vector paths, apps must explicitly call the set algorithm
> > > function before using other functions from this library, as this is
> > > where vector handlers are now chosen.
> >
> > [DC] Has it been decided that it is ok to now require applications to
> > pick the CRC algorithm they want to use?
> >
> > An application which previously automatically got SSE4.2 CRC, for
> > example, will now automatically only get scalar.
> >
> > If this is ok, this should probably be called out explicitly in
> > release notes as it may not be Immediately noticeable to users that
> > they now need to select the CRC algo.
> >
> > Actually, in general, the release notes need to be updated for this
> patchset.
> 
> The decision to move rte_set_alg() out of RTE_INIT was taken to avoid check
> on max_simd_bitwidth in data path for every single time when crc_calc() api
> is invoked. Based on my understanding, max_simd_bitwidth is set after eal
> init, and when used in crc_calc(), it might override the default crc algo set
> during RTE_INIT. Therefore, to avoid extra check on max_simd_bitwidth in
> data path,  better option will be to use this static configuration one time after
> eal init in the set_algo API.

[DC] Yes that is a good change to have made to avoid extra datapath checks.

Based on off-list discussion, I now also know the reason behind now defaulting
to scalar CRC in RTE_INIT. If a higher bitwidth CRC was chosen by RTE_INIT (e.g.
SSE4.2 CRC) but the max_simd_bitwidth was then set to RTE_NO_SIMD (64) through
the EAL parameter or call to rte_set_max_simd_bitwidth(), then there is a mismatch
if rte_net_crc_set_alg() is not then called to reconfigure the CRC. Defaulting to scalar
avoids this mismatch and works on all archs

As I mentioned before, I think this needs to be called out in release notes, as it's an
under-the-hood change which could cause app performance to drop if app developers
aren't aware of it - the API itself hasn't changed, so they may not read the doxygen :)

> 
> 
> > >
> > > Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
> > >
> > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > >
> > > ---
> > > v3:
> > >   - Moved choosing vector paths out of RTE_INIT.
> > >   - Moved checking max_simd_bitwidth into the set_alg function.
> > > ---
> > >  lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++---------
> > > lib/librte_net/rte_net_crc.h |  3 ++-
> > >  2 files changed, 19 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/lib/librte_net/rte_net_crc.c
> > > b/lib/librte_net/rte_net_crc.c index
> > > 9fd4794a9d..241eb16399 100644
> > > --- a/lib/librte_net/rte_net_crc.c
> > > +++ b/lib/librte_net/rte_net_crc.c
> >
> > <snip>
> >
> > > @@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data,
> > > uint32_t data_len)  void  rte_net_crc_set_alg(enum rte_net_crc_alg
> > > alg)  {
> > > +	if (max_simd_bitwidth == 0)
> > > +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> > > +
> > >  	switch (alg) {
> > >  #ifdef X86_64_SSE42_PCLMULQDQ
> > >  	case RTE_NET_CRC_SSE42:
> > > -		handlers = handlers_sse42;
> > > -		break;
> > > +		if (max_simd_bitwidth >= RTE_MAX_128_SIMD) {
> > > +			handlers = handlers_sse42;
> > > +			return;
> > > +		}
> > > +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using
> > > scalar\n");
> >
> > [DC] Not sure if you're aware but there is another patchset which adds
> > an
> > AVX512 CRC implementation and run-time checking of cpuflags to select
> > the CRC path to use:
> > https://patchwork.dpdk.org/project/dpdk/list/?series=12596
> >
> > There will be a task to merge these 2 patchsets if both are merged. It
> > looks fairly straightforward to me to merge these, but it would be
> > good if you take a look too


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-10-01 14:16         ` Coyle, David
@ 2020-10-01 14:19           ` Power, Ciara
  2020-10-06 10:00             ` Olivier Matz
  0 siblings, 1 reply; 276+ messages in thread
From: Power, Ciara @ 2020-10-01 14:19 UTC (permalink / raw)
  To: Coyle, David, Singh, Jasvinder, dev
  Cc: Olivier Matz, O'loingsigh, Mairtin, Ryan, Brendan, Richardson, Bruce

Hi David,

Thanks for reviewing, 

>-----Original Message-----
>From: Coyle, David <david.coyle@intel.com>
>Sent: Thursday 1 October 2020 15:17
>To: Singh, Jasvinder <jasvinder.singh@intel.com>; Power, Ciara
><ciara.power@intel.com>; dev@dpdk.org
>Cc: Power, Ciara <ciara.power@intel.com>; Olivier Matz
><olivier.matz@6wind.com>; O'loingsigh, Mairtin
><mairtin.oloingsigh@intel.com>; Ryan, Brendan <brendan.ryan@intel.com>;
>Richardson, Bruce <bruce.richardson@intel.com>
>Subject: RE: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD
>bitwidth
>
>Hi Jasvinder/Ciara
>
>> -----Original Message-----
>> From: Singh, Jasvinder <jasvinder.singh@intel.com>
>> >
>> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power When
>> > > choosing a vector path to take, an extra condition must be
>> > > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
>path.
>> > >
>> > > The vector path was initially chosen in RTE_INIT, however this is
>> > > no longer suitable as we cannot check the max SIMD bitwidth at that
>time.
>> > > The default chosen in RTE_INIT is now scalar. For best performance
>> > > and to use vector paths, apps must explicitly call the set
>> > > algorithm function before using other functions from this library,
>> > > as this is where vector handlers are now chosen.
>> >
>> > [DC] Has it been decided that it is ok to now require applications
>> > to pick the CRC algorithm they want to use?
>> >
>> > An application which previously automatically got SSE4.2 CRC, for
>> > example, will now automatically only get scalar.
>> >
>> > If this is ok, this should probably be called out explicitly in
>> > release notes as it may not be Immediately noticeable to users that
>> > they now need to select the CRC algo.
>> >
>> > Actually, in general, the release notes need to be updated for this
>> patchset.
>>
>> The decision to move rte_set_alg() out of RTE_INIT was taken to avoid
>> check on max_simd_bitwidth in data path for every single time when
>> crc_calc() api is invoked. Based on my understanding,
>> max_simd_bitwidth is set after eal init, and when used in crc_calc(),
>> it might override the default crc algo set during RTE_INIT. Therefore,
>> to avoid extra check on max_simd_bitwidth in data path,  better option
>> will be to use this static configuration one time after eal init in the set_algo
>API.
>
>[DC] Yes that is a good change to have made to avoid extra datapath checks.
>
>Based on off-list discussion, I now also know the reason behind now
>defaulting to scalar CRC in RTE_INIT. If a higher bitwidth CRC was chosen by
>RTE_INIT (e.g.
>SSE4.2 CRC) but the max_simd_bitwidth was then set to RTE_NO_SIMD (64)
>through the EAL parameter or call to rte_set_max_simd_bitwidth(), then
>there is a mismatch if rte_net_crc_set_alg() is not then called to reconfigure
>the CRC. Defaulting to scalar avoids this mismatch and works on all archs
>
>As I mentioned before, I think this needs to be called out in release notes, as
>it's an under-the-hood change which could cause app performance to drop if
>app developers aren't aware of it - the API itself hasn't changed, so they may
>not read the doxygen :)
>

Yes that is a good point, I can add to the release notes for this to call it out. 

>>
>>
>> > >
>> > > Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
>> > >
>> > > Signed-off-by: Ciara Power <ciara.power@intel.com>
>> > >
>> > > ---
>> > > v3:
>> > >   - Moved choosing vector paths out of RTE_INIT.
>> > >   - Moved checking max_simd_bitwidth into the set_alg function.
>> > > ---
>> > >  lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++---------
>> > > lib/librte_net/rte_net_crc.h |  3 ++-
>> > >  2 files changed, 19 insertions(+), 10 deletions(-)
>> > >
>> > > diff --git a/lib/librte_net/rte_net_crc.c
>> > > b/lib/librte_net/rte_net_crc.c index
>> > > 9fd4794a9d..241eb16399 100644
>> > > --- a/lib/librte_net/rte_net_crc.c
>> > > +++ b/lib/librte_net/rte_net_crc.c
>> >
>> > <snip>
>> >
>> > > @@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data,
>> > > uint32_t data_len)  void  rte_net_crc_set_alg(enum rte_net_crc_alg
>> > > alg)  {
>> > > +	if (max_simd_bitwidth == 0)
>> > > +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
>> > > +
>> > >  	switch (alg) {
>> > >  #ifdef X86_64_SSE42_PCLMULQDQ
>> > >  	case RTE_NET_CRC_SSE42:
>> > > -		handlers = handlers_sse42;
>> > > -		break;
>> > > +		if (max_simd_bitwidth >= RTE_MAX_128_SIMD) {
>> > > +			handlers = handlers_sse42;
>> > > +			return;
>> > > +		}
>> > > +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using
>> > > scalar\n");
>> >
>> > [DC] Not sure if you're aware but there is another patchset which
>> > adds an
>> > AVX512 CRC implementation and run-time checking of cpuflags to
>> > select the CRC path to use:
>> > https://patchwork.dpdk.org/project/dpdk/list/?series=12596
>> >
>> > There will be a task to merge these 2 patchsets if both are merged.
>> > It looks fairly straightforward to me to merge these, but it would
>> > be good if you take a look too

I have looked at that patchset, I agree, I think they will be straightforward to merge together.

Thanks,
Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth Ciara Power
@ 2020-10-01 14:49     ` Coyle, David
  2020-10-06  9:32     ` Olivier Matz
                       ` (4 subsequent siblings)
  5 siblings, 0 replies; 276+ messages in thread
From: Coyle, David @ 2020-10-01 14:49 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Power, Ciara, Ray Kinsella, Neil Horman

Hi Ciara

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power

<snip>

> diff --git a/lib/librte_eal/common/eal_internal_cfg.h
> b/lib/librte_eal/common/eal_internal_cfg.h
> index 13f93388a7..367e0cc19e 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -33,6 +33,12 @@ struct hugepage_info {
>  	int lock_descriptor;    /**< file descriptor for hugepage dir */
>  };
> 
> +struct simd_bitwidth {
> +	/**< flag indicating if bitwidth is locked from further modification */
> +	bool locked;
> +	uint16_t bitwidth; /**< bitwidth value */ };

[DC] The doxygen comment on 'locked' flag uses '/**<' so should come after the field.
Having the comment after the field seems to be the way it's done in this file so I'd move
the comment as opposed to removing the '<'

> +
>  /**
>   * internal configuration
>   */
> @@ -85,6 +91,8 @@ struct internal_config {
>  	volatile unsigned int init_complete;
>  	/**< indicates whether EAL has completed initialization */
>  	unsigned int no_telemetry; /**< true to disable Telemetry */
> +	/** max simd bitwidth path to use */
> +	struct simd_bitwidth max_simd_bitwidth;

[DC] Again the doxygen comments seem to come after the struct fields in this file
so I'd move the comment for max_simd_bitwidth to after it and add the '<'

>  };
> 
>  void eal_reset_internal_config(struct internal_config *internal_cfg); diff --git

<snip>

> 
> diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> index ddcf6a2e7a..fb739f3474 100644
> --- a/lib/librte_eal/include/rte_eal.h
> +++ b/lib/librte_eal/include/rte_eal.h
> @@ -43,6 +43,14 @@ enum rte_proc_type_t {
>  	RTE_PROC_INVALID
>  };
> 
> +enum rte_max_simd_t {
> +	RTE_NO_SIMD = 64,
> +	RTE_MAX_128_SIMD = 128,
> +	RTE_MAX_256_SIMD = 256,
> +	RTE_MAX_512_SIMD = 512,
> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX,
> +};

[DC] Add doxygen comments on enum rte_max_simd_t and each of it's values

> +
>  /**
>   * Get the process type in a multi-process setup
>   *
> @@ -51,6 +59,31 @@ enum rte_proc_type_t {
>   */
>  enum rte_proc_type_t rte_eal_process_type(void);
> 
> +/**
> + * Get the supported SIMD bitwidth.
> + *
> + * @return
> + *   uint16_t bitwidth.
> + */
> +__rte_experimental
> +uint16_t rte_get_max_simd_bitwidth(void);
> +
> +/**
> + * Set the supported SIMD bitwidth.
> + * This API should only be called once at initialization, before EAL init.
> + *
> + * @param bitwidth
> + *   uint16_t bitwidth.
> + * @return
> + *   0 on success.
> + * @return
> + *   -EINVAL on invalid bitwidth parameter.
> + * @return
> + *   -EPERM if bitwidth is locked.

[DC] Minor thing.. normally there's just 1 @return tag with all of the return values under
it as a bullet list

> + */
> +__rte_experimental
> +int rte_set_max_simd_bitwidth(uint16_t bitwidth);
> +
>  /**
>   * Request iopl privilege for all RPL.
>   *
> diff --git a/lib/librte_eal/rte_eal_version.map
> b/lib/librte_eal/rte_eal_version.map
> index c32461c663..17a7195a3d 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -397,6 +397,10 @@ EXPERIMENTAL {
>  	rte_service_lcore_may_be_active;
>  	rte_thread_register;
>  	rte_thread_unregister;
> +
> +	# added in 20.11
> +	rte_get_max_simd_bitwidth;
> +	rte_set_max_simd_bitwidth;
>  };

[DC] rte_get_max_simd_bitwidth is called from rte_net_crc (and other libraries) so this
symbol possibly needs to be added to librte_eal/rte_eal_exports.def file too.

This is the windows symbol export file, used on windows build.

This has caught us out on the AVX512 CRC patchset https://patchwork.dpdk.org/project/dpdk/list/?series=12596
where a windows build failed in the 'ci/iol-testing' checks in patchwork because
rte_net_crc couldn't find the symbol rte_cpu_get_flag_enabled, which also comes
from rte_eal. We have to add this symbol to rte_eal_exports.def to fix this.

The 'ci/iol-testing' check has not run for your patchset so I can't say for certain if the
windows build would have failed for you, but I think it would

> 
>  INTERNAL {
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 12/18] net/mlx5: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 12/18] net/mlx5: " Ciara Power
@ 2020-10-05  6:30     ` Slava Ovsiienko
  0 siblings, 0 replies; 276+ messages in thread
From: Slava Ovsiienko @ 2020-10-05  6:30 UTC (permalink / raw)
  To: Ciara Power, dev; +Cc: Matan Azrad, Shahaf Shuler, Viacheslav Ovsiienko

> -----Original Message-----
> From: Ciara Power <ciara.power@intel.com>
> Sent: Wednesday, September 30, 2020 16:04
> To: dev@dpdk.org
> Cc: Ciara Power <ciara.power@intel.com>; Matan Azrad
> <matan@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>;
> Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> Subject: [PATCH v3 12/18] net/mlx5: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path.
> 
> Cc: Matan Azrad <matan@mellanox.com>
> Cc: Shahaf Shuler <shahafs@mellanox.com>
> Cc: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Looks good to me, 
thank you, Ciara.

With best regards, Slava

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 02/18] eal: add default SIMD bitwidth values
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 02/18] eal: add default SIMD bitwidth values Ciara Power
@ 2020-10-05 19:35     ` David Christensen
  2020-10-08 13:17     ` Ananyev, Konstantin
  2020-10-08 16:45     ` David Marchand
  2 siblings, 0 replies; 276+ messages in thread
From: David Christensen @ 2020-10-05 19:35 UTC (permalink / raw)
  To: Ciara Power, dev
  Cc: Ruifeng Wang, Jerin Jacob, Honnappa Nagarahalli, Jan Viktorin,
	Bruce Richardson, Konstantin Ananyev



On 9/30/20 6:03 AM, Ciara Power wrote:
> Each arch has a define for the default SIMD bitwidth value, this is used
> on EAL init to set the config max SIMD bitwidth.
> 
> Cc: Ruifeng Wang <ruifeng.wang@arm.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Cc: David Christensen <drc@linux.vnet.ibm.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v3:
>    - Removed unnecessary define in generic rte_vect.h
>    - Changed default bitwidth for ARM to UINT16_MAX, to allow for SVE.
> v2: Changed default bitwidth for Arm to 128.
> ---
>   lib/librte_eal/arm/include/rte_vect.h      | 2 ++
>   lib/librte_eal/common/eal_common_options.c | 3 +++
>   lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
>   lib/librte_eal/x86/include/rte_vect.h      | 2 ++
>   4 files changed, 9 insertions(+)
> 
Reviewed-By: David Christensen <drc@linux.vnet.ibm.com>

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth Ciara Power
  2020-10-01 14:49     ` Coyle, David
@ 2020-10-06  9:32     ` Olivier Matz
  2020-10-07 10:47       ` Power, Ciara
  2020-10-06 11:50     ` Maxime Coquelin
                       ` (3 subsequent siblings)
  5 siblings, 1 reply; 276+ messages in thread
From: Olivier Matz @ 2020-10-06  9:32 UTC (permalink / raw)
  To: Ciara Power; +Cc: dev, Ray Kinsella, Neil Horman

Hi Ciara,

Please find some comments below.

On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
> This patch adds a max SIMD bitwidth EAL configuration. The API allows
> for an app to set this value. It can also be set using EAL argument
> --force-max-simd-bitwidth, which will lock the value and override any
> modifications made by the app.
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v3:
>   - Added enum value to essentially disable using max SIMD to choose
>     paths, intended for use by ARM SVE.
>   - Fixed parsing bitwidth argument to return an error for values
>     greater than uint16_t.
> v2: Added to Doxygen comment for API.
> ---
>  lib/librte_eal/common/eal_common_options.c | 64 ++++++++++++++++++++++
>  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>  lib/librte_eal/common/eal_options.h        |  2 +
>  lib/librte_eal/include/rte_eal.h           | 33 +++++++++++
>  lib/librte_eal/rte_eal_version.map         |  4 ++
>  5 files changed, 111 insertions(+)
> 
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index a5426e1234..e9117a96af 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -102,6 +102,7 @@ eal_long_options[] = {
>  	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
>  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> +	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
>  	{0,                     0, NULL, 0                        }
>  };
>  
> @@ -1309,6 +1310,34 @@ eal_parse_iova_mode(const char *name)
>  	return 0;
>  }
>  
> +static int
> +eal_parse_simd_bitwidth(const char *arg, bool locked)
> +{
> +	char *end;
> +	unsigned long bitwidth;
> +	int ret;
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +
> +	if (arg == NULL || arg[0] == '\0')
> +		return -1;
> +
> +	errno = 0;
> +	bitwidth = strtoul(arg, &end, 0);
> +
> +	/* check for errors */
> +	if (bitwidth > UINT16_MAX || errno != 0 || end == NULL || *end != '\0')
> +		return -1;
> +
> +	if (bitwidth == 0)
> +		bitwidth = UINT16_MAX;
> +	ret = rte_set_max_simd_bitwidth(bitwidth);
> +	if (ret < 0)
> +		return -1;
> +	internal_conf->max_simd_bitwidth.locked = locked;
> +	return 0;
> +}
> +
>  static int
>  eal_parse_base_virtaddr(const char *arg)
>  {
> @@ -1707,6 +1736,13 @@ eal_parse_common_option(int opt, const char *optarg,
>  	case OPT_NO_TELEMETRY_NUM:
>  		conf->no_telemetry = 1;
>  		break;
> +	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
> +		if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
> +			RTE_LOG(ERR, EAL, "invalid parameter for --"
> +					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
> +			return -1;
> +		}
> +		break;
>  
>  	/* don't know what to do, leave this to caller */
>  	default:
> @@ -1903,6 +1939,33 @@ eal_check_common_options(struct internal_config *internal_cfg)
>  	return 0;
>  }
>  
> +uint16_t
> +rte_get_max_simd_bitwidth(void)
> +{
> +	const struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	return internal_conf->max_simd_bitwidth.bitwidth;
> +}

Should the return value be enum rte_max_simd_t?
If not, do we really need the enum definition?

> +
> +int
> +rte_set_max_simd_bitwidth(uint16_t bitwidth)
> +{
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	if (internal_conf->max_simd_bitwidth.locked) {
> +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
> +		return -EPERM;
> +	}
> +
> +	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth < RTE_NO_SIMD ||
> +			!rte_is_power_of_2(bitwidth))) {
> +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> +		return -EINVAL;
> +	}
> +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> +	return 0;
> +}

Same question, should the parameter be enum rte_max_simd_t?

> +
>  void
>  eal_common_usage(void)
>  {
> @@ -1981,6 +2044,7 @@ eal_common_usage(void)
>  	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
>  	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
>  	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
> +	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
>  	       "\nEAL options for DEBUG use only:\n"
>  	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
>  	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 13f93388a7..367e0cc19e 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -33,6 +33,12 @@ struct hugepage_info {
>  	int lock_descriptor;    /**< file descriptor for hugepage dir */
>  };
>  
> +struct simd_bitwidth {
> +	/**< flag indicating if bitwidth is locked from further modification */
> +	bool locked;
> +	uint16_t bitwidth; /**< bitwidth value */
> +};
> +
>  /**
>   * internal configuration
>   */
> @@ -85,6 +91,8 @@ struct internal_config {
>  	volatile unsigned int init_complete;
>  	/**< indicates whether EAL has completed initialization */
>  	unsigned int no_telemetry; /**< true to disable Telemetry */
> +	/** max simd bitwidth path to use */
> +	struct simd_bitwidth max_simd_bitwidth;
>  };
>  
>  void eal_reset_internal_config(struct internal_config *internal_cfg);
> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> index 89769d48b4..ef33979664 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -85,6 +85,8 @@ enum {
>  	OPT_TELEMETRY_NUM,
>  #define OPT_NO_TELEMETRY      "no-telemetry"
>  	OPT_NO_TELEMETRY_NUM,
> +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
> +	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
>  	OPT_LONG_MAX_NUM
>  };
>  
> diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> index ddcf6a2e7a..fb739f3474 100644
> --- a/lib/librte_eal/include/rte_eal.h
> +++ b/lib/librte_eal/include/rte_eal.h
> @@ -43,6 +43,14 @@ enum rte_proc_type_t {
>  	RTE_PROC_INVALID
>  };
>  
> +enum rte_max_simd_t {
> +	RTE_NO_SIMD = 64,
> +	RTE_MAX_128_SIMD = 128,
> +	RTE_MAX_256_SIMD = 256,
> +	RTE_MAX_512_SIMD = 512,
> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX,
> +};

What is the difference between RTE_NO_SIMD and RTE_MAX_SIMD_DISABLE?

The default value in internal_config is 0, so in my understanding
rte_get_max_simd_bitwidth() will return 0 if --force-max-simd-bitwidth
is not passed. Is it expected?

Maybe I'm missing something, but I don't understand why the value in
internal_config is not set to the maximum supported SIMD bitwidth by
default, and optionally overriden by the command line argument, or by
the API.


> +
>  /**
>   * Get the process type in a multi-process setup
>   *
> @@ -51,6 +59,31 @@ enum rte_proc_type_t {
>   */
>  enum rte_proc_type_t rte_eal_process_type(void);
>  
> +/**
> + * Get the supported SIMD bitwidth.
> + *
> + * @return
> + *   uint16_t bitwidth.
> + */
> +__rte_experimental
> +uint16_t rte_get_max_simd_bitwidth(void);
> +
> +/**
> + * Set the supported SIMD bitwidth.
> + * This API should only be called once at initialization, before EAL init.
> + *
> + * @param bitwidth
> + *   uint16_t bitwidth.
> + * @return
> + *   0 on success.
> + * @return
> + *   -EINVAL on invalid bitwidth parameter.
> + * @return
> + *   -EPERM if bitwidth is locked.
> + */
> +__rte_experimental
> +int rte_set_max_simd_bitwidth(uint16_t bitwidth);
> +
>  /**
>   * Request iopl privilege for all RPL.
>   *
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index c32461c663..17a7195a3d 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -397,6 +397,10 @@ EXPERIMENTAL {
>  	rte_service_lcore_may_be_active;
>  	rte_thread_register;
>  	rte_thread_unregister;
> +
> +	# added in 20.11
> +	rte_get_max_simd_bitwidth;
> +	rte_set_max_simd_bitwidth;
>  };
>  
>  INTERNAL {
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 17/18] net: " Ciara Power
  2020-09-30 15:03     ` Coyle, David
@ 2020-10-06  9:58     ` Olivier Matz
  1 sibling, 0 replies; 276+ messages in thread
From: Olivier Matz @ 2020-10-06  9:58 UTC (permalink / raw)
  To: Ciara Power; +Cc: dev, Jasvinder Singh

Hi,

On Wed, Sep 30, 2020 at 02:04:13PM +0100, Ciara Power wrote:
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> The vector path was initially chosen in RTE_INIT, however this is no
> longer suitable as we cannot check the max SIMD bitwidth at that time.
> The default chosen in RTE_INIT is now scalar. For best performance
> and to use vector paths, apps must explicitly call the set algorithm
> function before using other functions from this library, as this is
> where vector handlers are now chosen.
> 
> Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v3:
>   - Moved choosing vector paths out of RTE_INIT.
>   - Moved checking max_simd_bitwidth into the set_alg function.
> ---
>  lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++---------
>  lib/librte_net/rte_net_crc.h |  3 ++-
>  2 files changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
> index 9fd4794a9d..241eb16399 100644
> --- a/lib/librte_net/rte_net_crc.c
> +++ b/lib/librte_net/rte_net_crc.c
> @@ -9,6 +9,7 @@
>  #include <rte_cpuflags.h>
>  #include <rte_common.h>
>  #include <rte_net_crc.h>
> +#include <rte_eal.h>
>  
>  #if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ)
>  #define X86_64_SSE42_PCLMULQDQ     1
> @@ -60,6 +61,9 @@ static rte_net_crc_handler handlers_neon[] = {
>  };
>  #endif
>  
> +static uint16_t max_simd_bitwidth;
> +#define RTE_LOGTYPE_NET RTE_LOGTYPE_USER1

RTE_LOG_REGISTER() should be used instead.

> +
>  /**
>   * Reflect the bits about the middle
>   *
> @@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
>  void
>  rte_net_crc_set_alg(enum rte_net_crc_alg alg)
>  {
> +	if (max_simd_bitwidth == 0)
> +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +
>  	switch (alg) {
>  #ifdef X86_64_SSE42_PCLMULQDQ
>  	case RTE_NET_CRC_SSE42:
> -		handlers = handlers_sse42;
> -		break;
> +		if (max_simd_bitwidth >= RTE_MAX_128_SIMD) {
> +			handlers = handlers_sse42;
> +			return;
> +		}
> +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using scalar\n");

If max_simd_bitwidth is too low, it will keep the previous value.
I think we should avoid to say "using scalar" in the log, even if it is
correct today. For instance, when the avx implementation will be added,
the log will become wrong.


>  #elif defined ARM64_NEON_PMULL
>  		/* fall-through */
>  	case RTE_NET_CRC_NEON:
> -		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
> +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
> +				max_simd_bitwidth >= RTE_MAX_128_SIMD) {
>  			handlers = handlers_neon;
> -			break;
> +			return;
>  		}
> +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low or CPU flag not enabled, using scalar\n");
>  #endif
>  		/* fall-through */
>  	case RTE_NET_CRC_SCALAR:
> @@ -184,19 +196,15 @@ rte_net_crc_calc(const void *data,
>  /* Select highest available crc algorithm as default one */
>  RTE_INIT(rte_net_crc_init)
>  {
> -	enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
> -
>  	rte_net_crc_scalar_init();
>  
>  #ifdef X86_64_SSE42_PCLMULQDQ
> -	alg = RTE_NET_CRC_SSE42;
>  	rte_net_crc_sse42_init();
>  #elif defined ARM64_NEON_PMULL
>  	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
> -		alg = RTE_NET_CRC_NEON;
>  		rte_net_crc_neon_init();
>  	}
>  #endif
>  
> -	rte_net_crc_set_alg(alg);
> +	rte_net_crc_set_alg(RTE_NET_CRC_SCALAR);
>  }
> diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
> index 16e85ca970..7a45ebe193 100644
> --- a/lib/librte_net/rte_net_crc.h
> +++ b/lib/librte_net/rte_net_crc.h
> @@ -28,7 +28,8 @@ enum rte_net_crc_alg {
>  /**
>   * This API set the CRC computation algorithm (i.e. scalar version,
>   * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
> - * structure.
> + * structure. This should be called before any other functions, to
> + * choose the algorithm for best performance.
>   *
>   * @param alg
>   *   This parameter is used to select the CRC implementation version.
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-10-01 14:19           ` Power, Ciara
@ 2020-10-06 10:00             ` Olivier Matz
  2020-10-07 11:16               ` Power, Ciara
  2020-10-08 14:55               ` Ananyev, Konstantin
  0 siblings, 2 replies; 276+ messages in thread
From: Olivier Matz @ 2020-10-06 10:00 UTC (permalink / raw)
  To: Power, Ciara
  Cc: Coyle, David, Singh, Jasvinder, dev, O'loingsigh, Mairtin,
	Ryan, Brendan, Richardson, Bruce

Hi,

On Thu, Oct 01, 2020 at 02:19:37PM +0000, Power, Ciara wrote:
> Hi David,
> 
> Thanks for reviewing, 
> 
> >-----Original Message-----
> >From: Coyle, David <david.coyle@intel.com>
> >Sent: Thursday 1 October 2020 15:17
> >To: Singh, Jasvinder <jasvinder.singh@intel.com>; Power, Ciara
> ><ciara.power@intel.com>; dev@dpdk.org
> >Cc: Power, Ciara <ciara.power@intel.com>; Olivier Matz
> ><olivier.matz@6wind.com>; O'loingsigh, Mairtin
> ><mairtin.oloingsigh@intel.com>; Ryan, Brendan <brendan.ryan@intel.com>;
> >Richardson, Bruce <bruce.richardson@intel.com>
> >Subject: RE: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD
> >bitwidth
> >
> >Hi Jasvinder/Ciara
> >
> >> -----Original Message-----
> >> From: Singh, Jasvinder <jasvinder.singh@intel.com>
> >> >
> >> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power When
> >> > > choosing a vector path to take, an extra condition must be
> >> > > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> >path.
> >> > >
> >> > > The vector path was initially chosen in RTE_INIT, however this is
> >> > > no longer suitable as we cannot check the max SIMD bitwidth at that
> >time.
> >> > > The default chosen in RTE_INIT is now scalar. For best performance
> >> > > and to use vector paths, apps must explicitly call the set
> >> > > algorithm function before using other functions from this library,
> >> > > as this is where vector handlers are now chosen.
> >> >
> >> > [DC] Has it been decided that it is ok to now require applications
> >> > to pick the CRC algorithm they want to use?
> >> >
> >> > An application which previously automatically got SSE4.2 CRC, for
> >> > example, will now automatically only get scalar.
> >> >
> >> > If this is ok, this should probably be called out explicitly in
> >> > release notes as it may not be Immediately noticeable to users that
> >> > they now need to select the CRC algo.
> >> >
> >> > Actually, in general, the release notes need to be updated for this
> >> patchset.
> >>
> >> The decision to move rte_set_alg() out of RTE_INIT was taken to avoid
> >> check on max_simd_bitwidth in data path for every single time when
> >> crc_calc() api is invoked. Based on my understanding,
> >> max_simd_bitwidth is set after eal init, and when used in crc_calc(),
> >> it might override the default crc algo set during RTE_INIT. Therefore,
> >> to avoid extra check on max_simd_bitwidth in data path,  better option
> >> will be to use this static configuration one time after eal init in the set_algo
> >API.
> >
> >[DC] Yes that is a good change to have made to avoid extra datapath checks.
> >
> >Based on off-list discussion, I now also know the reason behind now
> >defaulting to scalar CRC in RTE_INIT. If a higher bitwidth CRC was chosen by
> >RTE_INIT (e.g.
> >SSE4.2 CRC) but the max_simd_bitwidth was then set to RTE_NO_SIMD (64)
> >through the EAL parameter or call to rte_set_max_simd_bitwidth(), then
> >there is a mismatch if rte_net_crc_set_alg() is not then called to reconfigure
> >the CRC. Defaulting to scalar avoids this mismatch and works on all archs
> >
> >As I mentioned before, I think this needs to be called out in release notes, as
> >it's an under-the-hood change which could cause app performance to drop if
> >app developers aren't aware of it - the API itself hasn't changed, so they may
> >not read the doxygen :)
> >
> 
> Yes that is a good point, I can add to the release notes for this to call it out. 

I don't think it is a good idea to have the scalar crc by default.
To me, the fastest available CRC has to be enabled by default.

I understand the technical reason why you did it like this however: the
SIMD bitwidth may not be known at the time the
RTE_INIT(rte_net_crc_init) function is called.

A simple approach to solve this issue would be to initialize the
rte_net_crc_handler pointer to a handlers_default. The first time a crc
is called, the rte_crc32_*_default_handler() function would check the
configured SIMD bitwidth, and set the handler to the correct one, to
avoid to do the test for next time.

This approach still does not solve the case where the SIMD bitwidth is
modified during the life of the application. In this case, a callback
would have to be registered to notify SIMD bitwidth changes... but I
don't think it is worth to do it. Instead, it can be documented that
rte_set_max_simd_bitwidth() has to be called early, before
rte_eal_init().



> >>
> >>
> >> > >
> >> > > Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
> >> > >
> >> > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> >> > >
> >> > > ---
> >> > > v3:
> >> > >   - Moved choosing vector paths out of RTE_INIT.
> >> > >   - Moved checking max_simd_bitwidth into the set_alg function.
> >> > > ---
> >> > >  lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++---------
> >> > > lib/librte_net/rte_net_crc.h |  3 ++-
> >> > >  2 files changed, 19 insertions(+), 10 deletions(-)
> >> > >
> >> > > diff --git a/lib/librte_net/rte_net_crc.c
> >> > > b/lib/librte_net/rte_net_crc.c index
> >> > > 9fd4794a9d..241eb16399 100644
> >> > > --- a/lib/librte_net/rte_net_crc.c
> >> > > +++ b/lib/librte_net/rte_net_crc.c
> >> >
> >> > <snip>
> >> >
> >> > > @@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data,
> >> > > uint32_t data_len)  void  rte_net_crc_set_alg(enum rte_net_crc_alg
> >> > > alg)  {
> >> > > +	if (max_simd_bitwidth == 0)
> >> > > +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> >> > > +
> >> > >  	switch (alg) {
> >> > >  #ifdef X86_64_SSE42_PCLMULQDQ
> >> > >  	case RTE_NET_CRC_SSE42:
> >> > > -		handlers = handlers_sse42;
> >> > > -		break;
> >> > > +		if (max_simd_bitwidth >= RTE_MAX_128_SIMD) {
> >> > > +			handlers = handlers_sse42;
> >> > > +			return;
> >> > > +		}
> >> > > +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using
> >> > > scalar\n");
> >> >
> >> > [DC] Not sure if you're aware but there is another patchset which
> >> > adds an
> >> > AVX512 CRC implementation and run-time checking of cpuflags to
> >> > select the CRC path to use:
> >> > https://patchwork.dpdk.org/project/dpdk/list/?series=12596
> >> >
> >> > There will be a task to merge these 2 patchsets if both are merged.
> >> > It looks fairly straightforward to me to merge these, but it would
> >> > be good if you take a look too
> 
> I have looked at that patchset, I agree, I think they will be straightforward to merge together.
> 
> Thanks,
> Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth Ciara Power
  2020-10-01 14:49     ` Coyle, David
  2020-10-06  9:32     ` Olivier Matz
@ 2020-10-06 11:50     ` Maxime Coquelin
  2020-10-07 10:58       ` Power, Ciara
  2020-10-08 13:07     ` Ananyev, Konstantin
                       ` (2 subsequent siblings)
  5 siblings, 1 reply; 276+ messages in thread
From: Maxime Coquelin @ 2020-10-06 11:50 UTC (permalink / raw)
  To: Ciara Power, dev; +Cc: Ray Kinsella, Neil Horman



On 9/30/20 3:03 PM, Ciara Power wrote:
> This patch adds a max SIMD bitwidth EAL configuration. The API allows
> for an app to set this value. It can also be set using EAL argument
> --force-max-simd-bitwidth, which will lock the value and override any
> modifications made by the app.
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v3:
>   - Added enum value to essentially disable using max SIMD to choose
>     paths, intended for use by ARM SVE.
>   - Fixed parsing bitwidth argument to return an error for values
>     greater than uint16_t.
> v2: Added to Doxygen comment for API.
> ---
>  lib/librte_eal/common/eal_common_options.c | 64 ++++++++++++++++++++++
>  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>  lib/librte_eal/common/eal_options.h        |  2 +
>  lib/librte_eal/include/rte_eal.h           | 33 +++++++++++
>  lib/librte_eal/rte_eal_version.map         |  4 ++
>  5 files changed, 111 insertions(+)
> 
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index a5426e1234..e9117a96af 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -102,6 +102,7 @@ eal_long_options[] = {
>  	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
>  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> +	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
>  	{0,                     0, NULL, 0                        }
>  };
>  
> @@ -1309,6 +1310,34 @@ eal_parse_iova_mode(const char *name)
>  	return 0;
>  }
>  
> +static int
> +eal_parse_simd_bitwidth(const char *arg, bool locked)
> +{
> +	char *end;
> +	unsigned long bitwidth;
> +	int ret;
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +
> +	if (arg == NULL || arg[0] == '\0')
> +		return -1;
> +
> +	errno = 0;
> +	bitwidth = strtoul(arg, &end, 0);
> +
> +	/* check for errors */
> +	if (bitwidth > UINT16_MAX || errno != 0 || end == NULL || *end != '\0')
> +		return -1;
> +
> +	if (bitwidth == 0)
> +		bitwidth = UINT16_MAX;

(unsigned long)RTE_MAX_SIMD_DISABLE instead?

> +	ret = rte_set_max_simd_bitwidth(bitwidth);
> +	if (ret < 0)
> +		return -1;
> +	internal_conf->max_simd_bitwidth.locked = locked;
> +	return 0;
> +}
> +
>  static int
>  eal_parse_base_virtaddr(const char *arg)
>  {
> @@ -1707,6 +1736,13 @@ eal_parse_common_option(int opt, const char *optarg,
>  	case OPT_NO_TELEMETRY_NUM:
>  		conf->no_telemetry = 1;
>  		break;
> +	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
> +		if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
> +			RTE_LOG(ERR, EAL, "invalid parameter for --"
> +					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
> +			return -1;
> +		}
> +		break;
>  
>  	/* don't know what to do, leave this to caller */
>  	default:
> @@ -1903,6 +1939,33 @@ eal_check_common_options(struct internal_config *internal_cfg)
>  	return 0;
>  }
>  
> +uint16_t

shouldn't it return an enum rte_max_simd_t?

> +rte_get_max_simd_bitwidth(void)
> +{
> +	const struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	return internal_conf->max_simd_bitwidth.bitwidth;

What is the default value if not set?

> +}
> +
> +int
> +rte_set_max_simd_bitwidth(uint16_t bitwidth)
> +{
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	if (internal_conf->max_simd_bitwidth.locked) {
> +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
> +		return -EPERM;
> +	}
> +
> +	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth < RTE_NO_SIMD ||
> +			!rte_is_power_of_2(bitwidth))) {
> +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> +		return -EINVAL;
> +	}
> +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> +	return 0;
> +}
> +
>  void
>  eal_common_usage(void)
>  {
> @@ -1981,6 +2044,7 @@ eal_common_usage(void)
>  	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
>  	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
>  	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
> +	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
>  	       "\nEAL options for DEBUG use only:\n"
>  	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
>  	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 13f93388a7..367e0cc19e 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -33,6 +33,12 @@ struct hugepage_info {
>  	int lock_descriptor;    /**< file descriptor for hugepage dir */
>  };
>  
> +struct simd_bitwidth {
> +	/**< flag indicating if bitwidth is locked from further modification */
> +	bool locked;
> +	uint16_t bitwidth; /**< bitwidth value */
> +};
> +
>  /**
>   * internal configuration
>   */
> @@ -85,6 +91,8 @@ struct internal_config {
>  	volatile unsigned int init_complete;
>  	/**< indicates whether EAL has completed initialization */
>  	unsigned int no_telemetry; /**< true to disable Telemetry */
> +	/** max simd bitwidth path to use */
> +	struct simd_bitwidth max_simd_bitwidth;
>  };
>  
>  void eal_reset_internal_config(struct internal_config *internal_cfg);
> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> index 89769d48b4..ef33979664 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -85,6 +85,8 @@ enum {
>  	OPT_TELEMETRY_NUM,
>  #define OPT_NO_TELEMETRY      "no-telemetry"
>  	OPT_NO_TELEMETRY_NUM,
> +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
> +	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
>  	OPT_LONG_MAX_NUM
>  };
>  
> diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> index ddcf6a2e7a..fb739f3474 100644
> --- a/lib/librte_eal/include/rte_eal.h
> +++ b/lib/librte_eal/include/rte_eal.h
> @@ -43,6 +43,14 @@ enum rte_proc_type_t {
>  	RTE_PROC_INVALID
>  };
>  
> +enum rte_max_simd_t {
> +	RTE_NO_SIMD = 64,
> +	RTE_MAX_128_SIMD = 128,
> +	RTE_MAX_256_SIMD = 256,
> +	RTE_MAX_512_SIMD = 512,
> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX,
> +};
> +
>  /**
>   * Get the process type in a multi-process setup
>   *
> @@ -51,6 +59,31 @@ enum rte_proc_type_t {
>   */
>  enum rte_proc_type_t rte_eal_process_type(void);
>  
> +/**
> + * Get the supported SIMD bitwidth.
> + *
> + * @return
> + *   uint16_t bitwidth.
> + */
> +__rte_experimental
> +uint16_t rte_get_max_simd_bitwidth(void);
> +
> +/**
> + * Set the supported SIMD bitwidth.
> + * This API should only be called once at initialization, before EAL init.
> + *
> + * @param bitwidth
> + *   uint16_t bitwidth.
> + * @return
> + *   0 on success.
> + * @return
> + *   -EINVAL on invalid bitwidth parameter.
> + * @return
> + *   -EPERM if bitwidth is locked.
> + */
> +__rte_experimental
> +int rte_set_max_simd_bitwidth(uint16_t bitwidth);
> +
>  /**
>   * Request iopl privilege for all RPL.
>   *
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index c32461c663..17a7195a3d 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -397,6 +397,10 @@ EXPERIMENTAL {
>  	rte_service_lcore_may_be_active;
>  	rte_thread_register;
>  	rte_thread_unregister;
> +
> +	# added in 20.11
> +	rte_get_max_simd_bitwidth;
> +	rte_set_max_simd_bitwidth;
>  };
>  
>  INTERNAL {
> 


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 14/18] distributor: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 14/18] distributor: " Ciara Power
@ 2020-10-06 12:17     ` David Hunt
  0 siblings, 0 replies; 276+ messages in thread
From: David Hunt @ 2020-10-06 12:17 UTC (permalink / raw)
  To: Ciara Power, dev

Hi Ciara,

On 30/9/2020 2:04 PM, Ciara Power wrote:
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
>
> Cc: David Hunt <david.hunt@intel.com>
>
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>   lib/librte_distributor/rte_distributor.c | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c
> index 1c047f065a..9f0a9b1d48 100644
> --- a/lib/librte_distributor/rte_distributor.c
> +++ b/lib/librte_distributor/rte_distributor.c
> @@ -636,7 +636,8 @@ rte_distributor_create(const char *name,
>   
>   	d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
>   #if defined(RTE_ARCH_X86)
> -	d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
> +	if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
> +		d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
>   #endif
>   
>   	/*


Acked-by: David Hunt <david.hunt@intel.com>



^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 16/18] efd: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 16/18] efd: " Ciara Power
@ 2020-10-07  0:51     ` Wang, Yipeng1
  0 siblings, 0 replies; 276+ messages in thread
From: Wang, Yipeng1 @ 2020-10-07  0:51 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Marohn, Byron

> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Wednesday, September 30, 2020 6:04 AM
> To: dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Marohn, Byron
> <byron.marohn@intel.com>; Wang, Yipeng1 <yipeng1.wang@intel.com>
> Subject: [PATCH v3 16/18] efd: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path.
> 
> Cc: Byron Marohn <byron.marohn@intel.com>
> Cc: Yipeng Wang <yipeng1.wang@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  lib/librte_efd/rte_efd.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c index
> 6a799556d4..509ecc8256 100644
> --- a/lib/librte_efd/rte_efd.c
> +++ b/lib/librte_efd/rte_efd.c
> @@ -645,7 +645,9 @@ rte_efd_create(const char *name, uint32_t
> max_num_rules, uint32_t key_len,
>  	 * For less than 4 bits, scalar function performs better
>  	 * than vectorised version
>  	 */
> -	if (RTE_EFD_VALUE_NUM_BITS > 3 &&
> rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> +	if (RTE_EFD_VALUE_NUM_BITS > 3
> +			&& rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)
> +			&& rte_get_max_simd_bitwidth() >=
> RTE_MAX_256_SIMD)
>  		table->lookup_fn = EFD_LOOKUP_AVX2;
>  	else
>  #endif
> @@ -655,7 +657,8 @@ rte_efd_create(const char *name, uint32_t
> max_num_rules, uint32_t key_len,
>  	 * than vectorised version
>  	 */
>  	if (RTE_EFD_VALUE_NUM_BITS > 16 &&
> -	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
> +	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
> +			rte_get_max_simd_bitwidth() >=
> RTE_MAX_128_SIMD)
>  		table->lookup_fn = EFD_LOOKUP_NEON;
>  	else
>  #endif
> --
> 2.17.1
[Wang, Yipeng] 
Acked-by: Yipeng Wang <yipeng1.wang@intel.com>


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 15/18] member: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 15/18] member: " Ciara Power
@ 2020-10-07  0:51     ` Wang, Yipeng1
  0 siblings, 0 replies; 276+ messages in thread
From: Wang, Yipeng1 @ 2020-10-07  0:51 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Gobriel, Sameh

> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Wednesday, September 30, 2020 6:04 AM
> To: dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Wang, Yipeng1
> <yipeng1.wang@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>
> Subject: [PATCH v3 15/18] member: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path.
> 
> Cc: Yipeng Wang <yipeng1.wang@intel.com>
> Cc: Sameh Gobriel <sameh.gobriel@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  lib/librte_member/rte_member_ht.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_member/rte_member_ht.c
> b/lib/librte_member/rte_member_ht.c
> index cbcd0d4407..71e3cf7b52 100644
> --- a/lib/librte_member/rte_member_ht.c
> +++ b/lib/librte_member/rte_member_ht.c
> @@ -113,7 +113,8 @@ rte_member_create_ht(struct rte_member_setsum
> *ss,
>  	}
>  #if defined(RTE_ARCH_X86)
>  	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> -			RTE_MEMBER_BUCKET_ENTRIES == 16)
> +			RTE_MEMBER_BUCKET_ENTRIES == 16 &&
> +			rte_get_max_simd_bitwidth() >=
> RTE_MAX_256_SIMD)
>  		ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
>  	else
>  #endif
> --
> 2.17.1
[Wang, Yipeng] 
Acked-by: Yipeng Wang <yipeng1.wang@intel.com>


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v2 08/17] net/fm10k: add checks for max SIMD bitwidth
  2020-08-27 16:12   ` [dpdk-dev] [PATCH v2 08/17] net/fm10k: " Ciara Power
@ 2020-10-07  5:01     ` Wang, Xiao W
  0 siblings, 0 replies; 276+ messages in thread
From: Wang, Xiao W @ 2020-10-07  5:01 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Zhang, Qi Z

Hi Ciara,

BRs,
Xiao

> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Friday, August 28, 2020 12:13 AM
> To: dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Wang, Xiao W <xiao.w.wang@intel.com>
> Subject: [PATCH v2 08/17] net/fm10k: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> Cc: Qi Zhang <qi.z.zhang@intel.com>
> Cc: Xiao Wang <xiao.w.wang@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  drivers/net/fm10k/fm10k_ethdev.c | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/fm10k/fm10k_ethdev.c
> b/drivers/net/fm10k/fm10k_ethdev.c
> index b574693bca..f7c41d4377 100644
> --- a/drivers/net/fm10k/fm10k_ethdev.c
> +++ b/drivers/net/fm10k/fm10k_ethdev.c
> @@ -2937,7 +2937,9 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
>  		/* primary process has set the ftag flag and offloads */
>  		txq = dev->data->tx_queues[0];
> -		if (fm10k_tx_vec_condition_check(txq)) {
> +		if (fm10k_tx_vec_condition_check(txq) ||
> +				rte_get_max_simd_bitwidth()
> +				< RTE_MAX_128_SIMD) {
>  			dev->tx_pkt_burst = fm10k_xmit_pkts;
>  			dev->tx_pkt_prepare = fm10k_prep_pkts;
>  			PMD_INIT_LOG(DEBUG, "Use regular Tx func");
> @@ -2956,7 +2958,8 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
>  		txq = dev->data->tx_queues[i];
>  		txq->tx_ftag_en = tx_ftag_en;
>  		/* Check if Vector Tx is satisfied */
> -		if (fm10k_tx_vec_condition_check(txq))
> +		if (fm10k_tx_vec_condition_check(txq) ||
> +				rte_get_max_simd_bitwidth() <
> RTE_MAX_128_SIMD)
>  			use_sse = 0;
>  	}
> 
> @@ -2990,7 +2993,9 @@ fm10k_set_rx_function(struct rte_eth_dev *dev)
>  	 * conditions to be met.
>  	 */
>  	if (!fm10k_rx_vec_condition_check(dev) &&
> -			dev_info->rx_vec_allowed && !rx_ftag_en) {
> +			dev_info->rx_vec_allowed && !rx_ftag_en &&
> +				rte_get_max_simd_bitwidth()
> +				>= RTE_MAX_128_SIMD) {
>  		if (dev->data->scattered_rx)
>  			dev->rx_pkt_burst = fm10k_recv_scattered_pkts_vec;
>  		else
> --
> 2.17.1

Acked-by: Xiao Wang <xiao.w.wang@intel.com>

Thanks~


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-06  9:32     ` Olivier Matz
@ 2020-10-07 10:47       ` Power, Ciara
  2020-10-07 10:52         ` Bruce Richardson
  2020-10-07 11:18         ` Olivier Matz
  0 siblings, 2 replies; 276+ messages in thread
From: Power, Ciara @ 2020-10-07 10:47 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev, Ray Kinsella, Neil Horman, Richardson, Bruce

Hi Olivier,

Thanks for reviewing, some comments below.


>-----Original Message-----
>From: Olivier Matz <olivier.matz@6wind.com>
>Sent: Tuesday 6 October 2020 10:32
>To: Power, Ciara <ciara.power@intel.com>
>Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
><nhorman@tuxdriver.com>
>Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
>
>Hi Ciara,
>
>Please find some comments below.
>
>On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
>> This patch adds a max SIMD bitwidth EAL configuration. The API allows
>> for an app to set this value. It can also be set using EAL argument
>> --force-max-simd-bitwidth, which will lock the value and override any
>> modifications made by the app.
>>
>> Signed-off-by: Ciara Power <ciara.power@intel.com>
>>
>> ---
>> v3:
>>   - Added enum value to essentially disable using max SIMD to choose
>>     paths, intended for use by ARM SVE.
>>   - Fixed parsing bitwidth argument to return an error for values
>>     greater than uint16_t.
>> v2: Added to Doxygen comment for API.
>> ---

<snip>

>>
>> +uint16_t
>> +rte_get_max_simd_bitwidth(void)
>> +{
>> +	const struct internal_config *internal_conf =
>> +		eal_get_internal_configuration();
>> +	return internal_conf->max_simd_bitwidth.bitwidth;
>> +}
>
>Should the return value be enum rte_max_simd_t?
>If not, do we really need the enum definition?
>

I kept the return value and param value below as uint16_t to allow for arbitrary values,
and will allow it be more flexible for future additions as new enums won't need to be added.
For the set function below, this is used when a user passes the EAL command line flag, which
passes an integer value rather than an enum one.
The enums are useful when checking the max_simd_bitwidth in drivers/libs, for example using
"RTE_MAX_256_SIMD" instead of "256" in the condition checks.

>> +
>> +int
>> +rte_set_max_simd_bitwidth(uint16_t bitwidth) {
>> +	struct internal_config *internal_conf =
>> +		eal_get_internal_configuration();
>> +	if (internal_conf->max_simd_bitwidth.locked) {
>> +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user
>runtime override enabled");
>> +		return -EPERM;
>> +	}
>> +
>> +	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth <
>RTE_NO_SIMD ||
>> +			!rte_is_power_of_2(bitwidth))) {
>> +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
>> +		return -EINVAL;
>> +	}
>> +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
>> +	return 0;
>> +}
>
>Same question, should the parameter be enum rte_max_simd_t?
>

<snip>

>> +enum rte_max_simd_t {
>> +	RTE_NO_SIMD = 64,
>> +	RTE_MAX_128_SIMD = 128,
>> +	RTE_MAX_256_SIMD = 256,
>> +	RTE_MAX_512_SIMD = 512,
>> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX,
>> +};
>
>What is the difference between RTE_NO_SIMD and
>RTE_MAX_SIMD_DISABLE?

RTE_NO_SIMD has value 64 to limit paths to scalar only.
RTE_MAX_SIMD_DISABLE sets the highest value possible,
so essentially disables the limit affecting which vector paths are taken.
This disable option was added to allow for ARM SVE which will be later added,
Discussed with Honnappa on a previous version: https://patchwork.dpdk.org/patch/76097/ 

>The default value in internal_config is 0, so in my understanding
>rte_get_max_simd_bitwidth() will return 0 if --force-max-simd-bitwidth is
>not passed. Is it expected?
>
>Maybe I'm missing something, but I don't understand why the value in
>internal_config is not set to the maximum supported SIMD bitwidth by
>default, and optionally overriden by the command line argument, or by the
>API.
>

The default value for max_simd_bitwidth is set depending on the architecture, 256 for x86/ppc,
and UINT16_MAX for ARM. So for example the default on x86 allows for AVX2 and under.
The defaults can be seen in patch 2: https://patchwork.dpdk.org/patch/79339/ 

<snip>

Thanks,
Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-07 10:47       ` Power, Ciara
@ 2020-10-07 10:52         ` Bruce Richardson
  2020-10-07 11:10           ` Power, Ciara
  2020-10-07 11:18         ` Olivier Matz
  1 sibling, 1 reply; 276+ messages in thread
From: Bruce Richardson @ 2020-10-07 10:52 UTC (permalink / raw)
  To: Power, Ciara; +Cc: Olivier Matz, dev, Ray Kinsella, Neil Horman

On Wed, Oct 07, 2020 at 11:47:34AM +0100, Power, Ciara wrote:
> Hi Olivier,
> 
> Thanks for reviewing, some comments below.
> 
> 
> >-----Original Message-----
> >From: Olivier Matz <olivier.matz@6wind.com>
> >Sent: Tuesday 6 October 2020 10:32
> >To: Power, Ciara <ciara.power@intel.com>
> >Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
> ><nhorman@tuxdriver.com>
> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
> >
> >Hi Ciara,
> >
> >Please find some comments below.
> >
> >On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
> >> This patch adds a max SIMD bitwidth EAL configuration. The API allows
> >> for an app to set this value. It can also be set using EAL argument
> >> --force-max-simd-bitwidth, which will lock the value and override any
> >> modifications made by the app.
> >>
> >> Signed-off-by: Ciara Power <ciara.power@intel.com>
> >>
> >> ---
> >> v3:
> >>   - Added enum value to essentially disable using max SIMD to choose
> >>     paths, intended for use by ARM SVE.
> >>   - Fixed parsing bitwidth argument to return an error for values
> >>     greater than uint16_t.
> >> v2: Added to Doxygen comment for API.
> >> ---
> 
> <snip>
> 
> >>
> >> +uint16_t
> >> +rte_get_max_simd_bitwidth(void)
> >> +{
> >> +const struct internal_config *internal_conf =
> >> +eal_get_internal_configuration();
> >> +return internal_conf->max_simd_bitwidth.bitwidth;
> >> +}
> >
> >Should the return value be enum rte_max_simd_t?
> >If not, do we really need the enum definition?
> >
> 
> I kept the return value and param value below as uint16_t to allow for arbitrary values,
> and will allow it be more flexible for future additions as new enums won't need to be added.
> For the set function below, this is used when a user passes the EAL command line flag, which
> passes an integer value rather than an enum one.
> The enums are useful when checking the max_simd_bitwidth in drivers/libs, for example using
> "RTE_MAX_256_SIMD" instead of "256" in the condition checks.
> 
So basically these enum values are #defines for readability, just in enum
form, right?

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-06 11:50     ` Maxime Coquelin
@ 2020-10-07 10:58       ` Power, Ciara
  0 siblings, 0 replies; 276+ messages in thread
From: Power, Ciara @ 2020-10-07 10:58 UTC (permalink / raw)
  To: Maxime Coquelin, dev; +Cc: Ray Kinsella, Neil Horman

Hi Maxime,

Thanks for reviewing, some comments below.

 
>-----Original Message-----
>From: Maxime Coquelin <maxime.coquelin@redhat.com>
>Sent: Tuesday 6 October 2020 12:50
>To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
>Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
>Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
>
>
>
>On 9/30/20 3:03 PM, Ciara Power wrote:
>> This patch adds a max SIMD bitwidth EAL configuration. The API allows
>> for an app to set this value. It can also be set using EAL argument
>> --force-max-simd-bitwidth, which will lock the value and override any
>> modifications made by the app.
>>
>> Signed-off-by: Ciara Power <ciara.power@intel.com>
>>
>> ---
>> v3:
>>   - Added enum value to essentially disable using max SIMD to choose
>>     paths, intended for use by ARM SVE.
>>   - Fixed parsing bitwidth argument to return an error for values
>>     greater than uint16_t.
>> v2: Added to Doxygen comment for API.
>> ---
<snip>
>> @@ -1903,6 +1939,33 @@ eal_check_common_options(struct
>internal_config *internal_cfg)
>>  	return 0;
>>  }
>>
>> +uint16_t
>
>shouldn't it return an enum rte_max_simd_t?

I kept the return value as uint16_t to allow for arbitrary values,
and will allow it be more flexible for future additions as new enums won't need to be added.
The enums are really used for readability when checking the bitwidth limit in drivers/libs, so
essentially #defines in enum form.

>
>> +rte_get_max_simd_bitwidth(void)
>> +{
>> +	const struct internal_config *internal_conf =
>> +		eal_get_internal_configuration();
>> +	return internal_conf->max_simd_bitwidth.bitwidth;
>
>What is the default value if not set?
>

The default value for max_simd_bitwidth is set depending on the architecture, 256 for x86/ppc,
and UINT16_MAX for ARM. So for example the default on x86 allows for AVX2 and under.
The defaults can be seen in patch 2: https://patchwork.dpdk.org/patch/79339/ 

<snip>

Thanks,
Ciara


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-07 10:52         ` Bruce Richardson
@ 2020-10-07 11:10           ` Power, Ciara
  0 siblings, 0 replies; 276+ messages in thread
From: Power, Ciara @ 2020-10-07 11:10 UTC (permalink / raw)
  To: Richardson, Bruce; +Cc: Olivier Matz, dev, Ray Kinsella, Neil Horman

Hi Bruce,

>-----Original Message-----
>From: Bruce Richardson <bruce.richardson@intel.com>
>Sent: Wednesday 7 October 2020 11:52
>To: Power, Ciara <ciara.power@intel.com>
>Cc: Olivier Matz <olivier.matz@6wind.com>; dev@dpdk.org; Ray Kinsella
><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
>Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
>
>On Wed, Oct 07, 2020 at 11:47:34AM +0100, Power, Ciara wrote:
>> Hi Olivier,
>>
>> Thanks for reviewing, some comments below.
>>
>>
>> >-----Original Message-----
>> >From: Olivier Matz <olivier.matz@6wind.com>
>> >Sent: Tuesday 6 October 2020 10:32
>> >To: Power, Ciara <ciara.power@intel.com>
>> >Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
>> ><nhorman@tuxdriver.com>
>> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
>> >
>> >Hi Ciara,
>> >
>> >Please find some comments below.
>> >
>> >On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
>> >> This patch adds a max SIMD bitwidth EAL configuration. The API
>> >> allows for an app to set this value. It can also be set using EAL
>> >> argument --force-max-simd-bitwidth, which will lock the value and
>> >> override any modifications made by the app.
>> >>
>> >> Signed-off-by: Ciara Power <ciara.power@intel.com>
>> >>
>> >> ---
>> >> v3:
>> >>   - Added enum value to essentially disable using max SIMD to choose
>> >>     paths, intended for use by ARM SVE.
>> >>   - Fixed parsing bitwidth argument to return an error for values
>> >>     greater than uint16_t.
>> >> v2: Added to Doxygen comment for API.
>> >> ---
>>
>> <snip>
>>
>> >>
>> >> +uint16_t
>> >> +rte_get_max_simd_bitwidth(void)
>> >> +{
>> >> +const struct internal_config *internal_conf =
>> >> +eal_get_internal_configuration();
>> >> +return internal_conf->max_simd_bitwidth.bitwidth;
>> >> +}
>> >
>> >Should the return value be enum rte_max_simd_t?
>> >If not, do we really need the enum definition?
>> >
>>
>> I kept the return value and param value below as uint16_t to allow for
>> arbitrary values, and will allow it be more flexible for future additions as
>new enums won't need to be added.
>> For the set function below, this is used when a user passes the EAL
>> command line flag, which passes an integer value rather than an enum one.
>> The enums are useful when checking the max_simd_bitwidth in
>> drivers/libs, for example using "RTE_MAX_256_SIMD" instead of "256" in
>the condition checks.
>>
>So basically these enum values are #defines for readability, just in enum
>form, right?

Yes, that's exactly right.

Thanks,
Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-10-06 10:00             ` Olivier Matz
@ 2020-10-07 11:16               ` Power, Ciara
  2020-10-08 14:55               ` Ananyev, Konstantin
  1 sibling, 0 replies; 276+ messages in thread
From: Power, Ciara @ 2020-10-07 11:16 UTC (permalink / raw)
  To: Olivier Matz
  Cc: Coyle, David, Singh, Jasvinder, dev, O'loingsigh, Mairtin,
	Ryan, Brendan, Richardson, Bruce

Hi Olivier,

 
>-----Original Message-----
>From: Olivier Matz <olivier.matz@6wind.com>
>Sent: Tuesday 6 October 2020 11:01
>To: Power, Ciara <ciara.power@intel.com>
>Cc: Coyle, David <david.coyle@intel.com>; Singh, Jasvinder
><jasvinder.singh@intel.com>; dev@dpdk.org; O'loingsigh, Mairtin
><mairtin.oloingsigh@intel.com>; Ryan, Brendan <brendan.ryan@intel.com>;
>Richardson, Bruce <bruce.richardson@intel.com>
>Subject: Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD
>bitwidth
>
>Hi,
>
>On Thu, Oct 01, 2020 at 02:19:37PM +0000, Power, Ciara wrote:
>> Hi David,
>>
>> Thanks for reviewing,
>>
>> >-----Original Message-----
>> >From: Coyle, David <david.coyle@intel.com>
>> >Sent: Thursday 1 October 2020 15:17
>> >To: Singh, Jasvinder <jasvinder.singh@intel.com>; Power, Ciara
>> ><ciara.power@intel.com>; dev@dpdk.org
>> >Cc: Power, Ciara <ciara.power@intel.com>; Olivier Matz
>> ><olivier.matz@6wind.com>; O'loingsigh, Mairtin
>> ><mairtin.oloingsigh@intel.com>; Ryan, Brendan
>> ><brendan.ryan@intel.com>; Richardson, Bruce
>> ><bruce.richardson@intel.com>
>> >Subject: RE: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD
>> >bitwidth
>> >
>> >Hi Jasvinder/Ciara
>> >
>> >> -----Original Message-----
>> >> From: Singh, Jasvinder <jasvinder.singh@intel.com>
>> >> >
>> >> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power When
>> >> > > choosing a vector path to take, an extra condition must be
>> >> > > satisfied to ensure the max SIMD bitwidth allows for the CPU
>> >> > > enabled
>> >path.
>> >> > >
>> >> > > The vector path was initially chosen in RTE_INIT, however this
>> >> > > is no longer suitable as we cannot check the max SIMD bitwidth
>> >> > > at that
>> >time.
>> >> > > The default chosen in RTE_INIT is now scalar. For best
>> >> > > performance and to use vector paths, apps must explicitly call
>> >> > > the set algorithm function before using other functions from
>> >> > > this library, as this is where vector handlers are now chosen.
>> >> >
>> >> > [DC] Has it been decided that it is ok to now require
>> >> > applications to pick the CRC algorithm they want to use?
>> >> >
>> >> > An application which previously automatically got SSE4.2 CRC, for
>> >> > example, will now automatically only get scalar.
>> >> >
>> >> > If this is ok, this should probably be called out explicitly in
>> >> > release notes as it may not be Immediately noticeable to users
>> >> > that they now need to select the CRC algo.
>> >> >
>> >> > Actually, in general, the release notes need to be updated for
>> >> > this
>> >> patchset.
>> >>
>> >> The decision to move rte_set_alg() out of RTE_INIT was taken to
>> >> avoid check on max_simd_bitwidth in data path for every single time
>> >> when
>> >> crc_calc() api is invoked. Based on my understanding,
>> >> max_simd_bitwidth is set after eal init, and when used in
>> >> crc_calc(), it might override the default crc algo set during
>> >> RTE_INIT. Therefore, to avoid extra check on max_simd_bitwidth in
>> >> data path,  better option will be to use this static configuration
>> >> one time after eal init in the set_algo
>> >API.
>> >
>> >[DC] Yes that is a good change to have made to avoid extra datapath
>checks.
>> >
>> >Based on off-list discussion, I now also know the reason behind now
>> >defaulting to scalar CRC in RTE_INIT. If a higher bitwidth CRC was
>> >chosen by RTE_INIT (e.g.
>> >SSE4.2 CRC) but the max_simd_bitwidth was then set to RTE_NO_SIMD
>> >(64) through the EAL parameter or call to
>> >rte_set_max_simd_bitwidth(), then there is a mismatch if
>> >rte_net_crc_set_alg() is not then called to reconfigure the CRC.
>> >Defaulting to scalar avoids this mismatch and works on all archs
>> >
>> >As I mentioned before, I think this needs to be called out in release
>> >notes, as it's an under-the-hood change which could cause app
>> >performance to drop if app developers aren't aware of it - the API
>> >itself hasn't changed, so they may not read the doxygen :)
>> >
>>
>> Yes that is a good point, I can add to the release notes for this to call it out.
>
>I don't think it is a good idea to have the scalar crc by default.
>To me, the fastest available CRC has to be enabled by default.
>
>I understand the technical reason why you did it like this however: the SIMD
>bitwidth may not be known at the time the
>RTE_INIT(rte_net_crc_init) function is called.
>
>A simple approach to solve this issue would be to initialize the
>rte_net_crc_handler pointer to a handlers_default. The first time a crc is
>called, the rte_crc32_*_default_handler() function would check the
>configured SIMD bitwidth, and set the handler to the correct one, to avoid to
>do the test for next time.

Thanks for this suggestion, will try this for the next version, it seems it will work quite well, thanks.

>This approach still does not solve the case where the SIMD bitwidth is
>modified during the life of the application. In this case, a callback would have
>to be registered to notify SIMD bitwidth changes... but I don't think it is worth
>to do it. Instead, it can be documented that
>rte_set_max_simd_bitwidth() has to be called early, before rte_eal_init().
>

Yes, It is documented in the Doxygen comment for the rte_set_max_simd_bitwidth() function
 that it should be called early, as you mentioned.

<snip>

Thanks,
Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-07 10:47       ` Power, Ciara
  2020-10-07 10:52         ` Bruce Richardson
@ 2020-10-07 11:18         ` Olivier Matz
  2020-10-08  9:25           ` Power, Ciara
  1 sibling, 1 reply; 276+ messages in thread
From: Olivier Matz @ 2020-10-07 11:18 UTC (permalink / raw)
  To: Power, Ciara; +Cc: dev, Ray Kinsella, Neil Horman, Richardson, Bruce

Hi Ciara,

On Wed, Oct 07, 2020 at 10:47:34AM +0000, Power, Ciara wrote:
> Hi Olivier,
> 
> Thanks for reviewing, some comments below.
> 
> 
> >-----Original Message-----
> >From: Olivier Matz <olivier.matz@6wind.com>
> >Sent: Tuesday 6 October 2020 10:32
> >To: Power, Ciara <ciara.power@intel.com>
> >Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
> ><nhorman@tuxdriver.com>
> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
> >
> >Hi Ciara,
> >
> >Please find some comments below.
> >
> >On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
> >> This patch adds a max SIMD bitwidth EAL configuration. The API allows
> >> for an app to set this value. It can also be set using EAL argument
> >> --force-max-simd-bitwidth, which will lock the value and override any
> >> modifications made by the app.
> >>
> >> Signed-off-by: Ciara Power <ciara.power@intel.com>
> >>
> >> ---
> >> v3:
> >>   - Added enum value to essentially disable using max SIMD to choose
> >>     paths, intended for use by ARM SVE.
> >>   - Fixed parsing bitwidth argument to return an error for values
> >>     greater than uint16_t.
> >> v2: Added to Doxygen comment for API.
> >> ---
> 
> <snip>
> 
> >>
> >> +uint16_t
> >> +rte_get_max_simd_bitwidth(void)
> >> +{
> >> +	const struct internal_config *internal_conf =
> >> +		eal_get_internal_configuration();
> >> +	return internal_conf->max_simd_bitwidth.bitwidth;
> >> +}
> >
> >Should the return value be enum rte_max_simd_t?
> >If not, do we really need the enum definition?
> >
> 
> I kept the return value and param value below as uint16_t to allow for arbitrary values,
> and will allow it be more flexible for future additions as new enums won't need to be added.
> For the set function below, this is used when a user passes the EAL command line flag, which
> passes an integer value rather than an enum one.
> The enums are useful when checking the max_simd_bitwidth in drivers/libs, for example using
> "RTE_MAX_256_SIMD" instead of "256" in the condition checks.
> 
> >> +
> >> +int
> >> +rte_set_max_simd_bitwidth(uint16_t bitwidth) {
> >> +	struct internal_config *internal_conf =
> >> +		eal_get_internal_configuration();
> >> +	if (internal_conf->max_simd_bitwidth.locked) {
> >> +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user
> >runtime override enabled");
> >> +		return -EPERM;
> >> +	}
> >> +
> >> +	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth <
> >RTE_NO_SIMD ||
> >> +			!rte_is_power_of_2(bitwidth))) {
> >> +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> >> +		return -EINVAL;
> >> +	}
> >> +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> >> +	return 0;
> >> +}
> >
> >Same question, should the parameter be enum rte_max_simd_t?
> >
> 
> <snip>
> 
> >> +enum rte_max_simd_t {
> >> +	RTE_NO_SIMD = 64,
> >> +	RTE_MAX_128_SIMD = 128,
> >> +	RTE_MAX_256_SIMD = 256,
> >> +	RTE_MAX_512_SIMD = 512,
> >> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX,
> >> +};
> >
> >What is the difference between RTE_NO_SIMD and
> >RTE_MAX_SIMD_DISABLE?
> 
> RTE_NO_SIMD has value 64 to limit paths to scalar only.
> RTE_MAX_SIMD_DISABLE sets the highest value possible,
> so essentially disables the limit affecting which vector paths are taken.
> This disable option was added to allow for ARM SVE which will be later added,
> Discussed with Honnappa on a previous version: https://patchwork.dpdk.org/patch/76097/ 

Ok, so RTE_MAX_SIMD_DISABLE means "disable the max limit", right?

I feel the name is a bit confusing. What about something like this:

enum rte_simd {
	RTE_SIMD_DISABLED = 0,
	RTE_SIMD_128 = 128,
	RTE_SIMD_256 = 256,
	RTE_SIMD_512 = 512,
	RTE_SIMD_MAX = UINT16_MAX,
};


> 
> >The default value in internal_config is 0, so in my understanding
> >rte_get_max_simd_bitwidth() will return 0 if --force-max-simd-bitwidth is
> >not passed. Is it expected?
> >
> >Maybe I'm missing something, but I don't understand why the value in
> >internal_config is not set to the maximum supported SIMD bitwidth by
> >default, and optionally overriden by the command line argument, or by the
> >API.
> >
> 
> The default value for max_simd_bitwidth is set depending on the architecture, 256 for x86/ppc,
> and UINT16_MAX for ARM. So for example the default on x86 allows for AVX2 and under.
> The defaults can be seen in patch 2: https://patchwork.dpdk.org/patch/79339/ 

Ok, I was expecting to have a runtime check for this. For instance, on
intel architecture, it is not known at compilation, it depends on the
target which can support up to AVX, AVX2, or AVX512.

> 
> <snip>
> 
> Thanks,
> Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-07 11:18         ` Olivier Matz
@ 2020-10-08  9:25           ` Power, Ciara
  2020-10-08 10:04             ` Olivier Matz
  0 siblings, 1 reply; 276+ messages in thread
From: Power, Ciara @ 2020-10-08  9:25 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev, Ray Kinsella, Neil Horman, Richardson, Bruce

Hi Olivier,


>-----Original Message-----
>From: Olivier Matz <olivier.matz@6wind.com>
>Sent: Wednesday 7 October 2020 12:18
>To: Power, Ciara <ciara.power@intel.com>
>Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
><nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
>Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
>
>Hi Ciara,
>
>On Wed, Oct 07, 2020 at 10:47:34AM +0000, Power, Ciara wrote:
>> Hi Olivier,
>>
>> Thanks for reviewing, some comments below.
>>
>>
>> >-----Original Message-----
>> >From: Olivier Matz <olivier.matz@6wind.com>
>> >Sent: Tuesday 6 October 2020 10:32
>> >To: Power, Ciara <ciara.power@intel.com>
>> >Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
>> ><nhorman@tuxdriver.com>
>> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
>> >
>> >Hi Ciara,
>> >
>> >Please find some comments below.
>> >
>> >On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
>> >> This patch adds a max SIMD bitwidth EAL configuration. The API
>> >> allows for an app to set this value. It can also be set using EAL
>> >> argument --force-max-simd-bitwidth, which will lock the value and
>> >> override any modifications made by the app.
>> >>
>> >> Signed-off-by: Ciara Power <ciara.power@intel.com>
>> >>
>> >> ---
>> >> v3:
>> >>   - Added enum value to essentially disable using max SIMD to choose
>> >>     paths, intended for use by ARM SVE.
>> >>   - Fixed parsing bitwidth argument to return an error for values
>> >>     greater than uint16_t.
>> >> v2: Added to Doxygen comment for API.
>> >> ---
>>
>> <snip>
>>
>> >>
>> >> +uint16_t
>> >> +rte_get_max_simd_bitwidth(void)
>> >> +{
>> >> +	const struct internal_config *internal_conf =
>> >> +		eal_get_internal_configuration();
>> >> +	return internal_conf->max_simd_bitwidth.bitwidth;
>> >> +}
>> >
>> >Should the return value be enum rte_max_simd_t?
>> >If not, do we really need the enum definition?
>> >
>>
>> I kept the return value and param value below as uint16_t to allow for
>> arbitrary values, and will allow it be more flexible for future additions as
>new enums won't need to be added.
>> For the set function below, this is used when a user passes the EAL
>> command line flag, which passes an integer value rather than an enum one.
>> The enums are useful when checking the max_simd_bitwidth in
>> drivers/libs, for example using "RTE_MAX_256_SIMD" instead of "256" in
>the condition checks.
>>
>> >> +
>> >> +int
>> >> +rte_set_max_simd_bitwidth(uint16_t bitwidth) {
>> >> +	struct internal_config *internal_conf =
>> >> +		eal_get_internal_configuration();
>> >> +	if (internal_conf->max_simd_bitwidth.locked) {
>> >> +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user
>> >runtime override enabled");
>> >> +		return -EPERM;
>> >> +	}
>> >> +
>> >> +	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth <
>> >RTE_NO_SIMD ||
>> >> +			!rte_is_power_of_2(bitwidth))) {
>> >> +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
>> >> +		return -EINVAL;
>> >> +	}
>> >> +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
>> >> +	return 0;
>> >> +}
>> >
>> >Same question, should the parameter be enum rte_max_simd_t?
>> >
>>
>> <snip>
>>
>> >> +enum rte_max_simd_t {
>> >> +	RTE_NO_SIMD = 64,
>> >> +	RTE_MAX_128_SIMD = 128,
>> >> +	RTE_MAX_256_SIMD = 256,
>> >> +	RTE_MAX_512_SIMD = 512,
>> >> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX, };
>> >
>> >What is the difference between RTE_NO_SIMD and
>RTE_MAX_SIMD_DISABLE?
>>
>> RTE_NO_SIMD has value 64 to limit paths to scalar only.
>> RTE_MAX_SIMD_DISABLE sets the highest value possible, so essentially
>> disables the limit affecting which vector paths are taken.
>> This disable option was added to allow for ARM SVE which will be later
>> added, Discussed with Honnappa on a previous version:
>> https://patchwork.dpdk.org/patch/76097/
>
>Ok, so RTE_MAX_SIMD_DISABLE means "disable the max limit", right?
>
>I feel the name is a bit confusing. What about something like this:
>
>enum rte_simd {
>	RTE_SIMD_DISABLED = 0,
>	RTE_SIMD_128 = 128,
>	RTE_SIMD_256 = 256,
>	RTE_SIMD_512 = 512,
>	RTE_SIMD_MAX = UINT16_MAX,
>};
>
>

Sure, I can rename these. Although will implement with RTE_SIMD_DISABLED=64 to allow for scalar path only.

>>
>> >The default value in internal_config is 0, so in my understanding
>> >rte_get_max_simd_bitwidth() will return 0 if
>> >--force-max-simd-bitwidth is not passed. Is it expected?
>> >
>> >Maybe I'm missing something, but I don't understand why the value in
>> >internal_config is not set to the maximum supported SIMD bitwidth by
>> >default, and optionally overriden by the command line argument, or by
>> >the API.
>> >
>>
>> The default value for max_simd_bitwidth is set depending on the
>> architecture, 256 for x86/ppc, and UINT16_MAX for ARM. So for example
>the default on x86 allows for AVX2 and under.
>> The defaults can be seen in patch 2:
>> https://patchwork.dpdk.org/patch/79339/
>
>Ok, I was expecting to have a runtime check for this. For instance, on intel
>architecture, it is not known at compilation, it depends on the target which
>can support up to AVX, AVX2, or AVX512.
>

Yes, the actual support will vary, but this max SIMD bitwidth is only an upper limit on what paths can be taken.
So for example with x86 default at 256, the path will still be chosen based on what the target can support, but it must be AVX2 or a lesser path. 
This allows for AVX512 to be enabled at runtime, by increasing the max SIMD bitwidth to 512, allowing for that path to be taken where supported.

Thanks,
Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-08  9:25           ` Power, Ciara
@ 2020-10-08 10:04             ` Olivier Matz
  2020-10-08 10:58               ` Power, Ciara
  0 siblings, 1 reply; 276+ messages in thread
From: Olivier Matz @ 2020-10-08 10:04 UTC (permalink / raw)
  To: Power, Ciara; +Cc: dev, Ray Kinsella, Neil Horman, Richardson, Bruce

Hi Ciara,

On Thu, Oct 08, 2020 at 09:25:42AM +0000, Power, Ciara wrote:
> Hi Olivier,
> 
> 
> >-----Original Message-----
> >From: Olivier Matz <olivier.matz@6wind.com>
> >Sent: Wednesday 7 October 2020 12:18
> >To: Power, Ciara <ciara.power@intel.com>
> >Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
> ><nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
> >
> >Hi Ciara,
> >
> >On Wed, Oct 07, 2020 at 10:47:34AM +0000, Power, Ciara wrote:
> >> Hi Olivier,
> >>
> >> Thanks for reviewing, some comments below.
> >>
> >>
> >> >-----Original Message-----
> >> >From: Olivier Matz <olivier.matz@6wind.com>
> >> >Sent: Tuesday 6 October 2020 10:32
> >> >To: Power, Ciara <ciara.power@intel.com>
> >> >Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
> >> ><nhorman@tuxdriver.com>
> >> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
> >> >
> >> >Hi Ciara,
> >> >
> >> >Please find some comments below.
> >> >
> >> >On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
> >> >> This patch adds a max SIMD bitwidth EAL configuration. The API
> >> >> allows for an app to set this value. It can also be set using EAL
> >> >> argument --force-max-simd-bitwidth, which will lock the value and
> >> >> override any modifications made by the app.
> >> >>
> >> >> Signed-off-by: Ciara Power <ciara.power@intel.com>
> >> >>
> >> >> ---
> >> >> v3:
> >> >>   - Added enum value to essentially disable using max SIMD to choose
> >> >>     paths, intended for use by ARM SVE.
> >> >>   - Fixed parsing bitwidth argument to return an error for values
> >> >>     greater than uint16_t.
> >> >> v2: Added to Doxygen comment for API.
> >> >> ---
> >>
> >> <snip>
> >>
> >> >>
> >> >> +uint16_t
> >> >> +rte_get_max_simd_bitwidth(void)
> >> >> +{
> >> >> +	const struct internal_config *internal_conf =
> >> >> +		eal_get_internal_configuration();
> >> >> +	return internal_conf->max_simd_bitwidth.bitwidth;
> >> >> +}
> >> >
> >> >Should the return value be enum rte_max_simd_t?
> >> >If not, do we really need the enum definition?
> >> >
> >>
> >> I kept the return value and param value below as uint16_t to allow for
> >> arbitrary values, and will allow it be more flexible for future additions as
> >new enums won't need to be added.
> >> For the set function below, this is used when a user passes the EAL
> >> command line flag, which passes an integer value rather than an enum one.
> >> The enums are useful when checking the max_simd_bitwidth in
> >> drivers/libs, for example using "RTE_MAX_256_SIMD" instead of "256" in
> >the condition checks.
> >>
> >> >> +
> >> >> +int
> >> >> +rte_set_max_simd_bitwidth(uint16_t bitwidth) {
> >> >> +	struct internal_config *internal_conf =
> >> >> +		eal_get_internal_configuration();
> >> >> +	if (internal_conf->max_simd_bitwidth.locked) {
> >> >> +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user
> >> >runtime override enabled");
> >> >> +		return -EPERM;
> >> >> +	}
> >> >> +
> >> >> +	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth <
> >> >RTE_NO_SIMD ||
> >> >> +			!rte_is_power_of_2(bitwidth))) {
> >> >> +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> >> >> +		return -EINVAL;
> >> >> +	}
> >> >> +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> >> >> +	return 0;
> >> >> +}
> >> >
> >> >Same question, should the parameter be enum rte_max_simd_t?
> >> >
> >>
> >> <snip>
> >>
> >> >> +enum rte_max_simd_t {
> >> >> +	RTE_NO_SIMD = 64,
> >> >> +	RTE_MAX_128_SIMD = 128,
> >> >> +	RTE_MAX_256_SIMD = 256,
> >> >> +	RTE_MAX_512_SIMD = 512,
> >> >> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX, };
> >> >
> >> >What is the difference between RTE_NO_SIMD and
> >RTE_MAX_SIMD_DISABLE?
> >>
> >> RTE_NO_SIMD has value 64 to limit paths to scalar only.
> >> RTE_MAX_SIMD_DISABLE sets the highest value possible, so essentially
> >> disables the limit affecting which vector paths are taken.
> >> This disable option was added to allow for ARM SVE which will be later
> >> added, Discussed with Honnappa on a previous version:
> >> https://patchwork.dpdk.org/patch/76097/
> >
> >Ok, so RTE_MAX_SIMD_DISABLE means "disable the max limit", right?
> >
> >I feel the name is a bit confusing. What about something like this:
> >
> >enum rte_simd {
> >	RTE_SIMD_DISABLED = 0,
> >	RTE_SIMD_128 = 128,
> >	RTE_SIMD_256 = 256,
> >	RTE_SIMD_512 = 512,
> >	RTE_SIMD_MAX = UINT16_MAX,
> >};
> >
> >
> 
> Sure, I can rename these. Although will implement with RTE_SIMD_DISABLED=64 to allow for scalar path only.

Out of curiosity, why 64? I thought 0 was a good value for "disabled".

> >>
> >> >The default value in internal_config is 0, so in my understanding
> >> >rte_get_max_simd_bitwidth() will return 0 if
> >> >--force-max-simd-bitwidth is not passed. Is it expected?
> >> >
> >> >Maybe I'm missing something, but I don't understand why the value in
> >> >internal_config is not set to the maximum supported SIMD bitwidth by
> >> >default, and optionally overriden by the command line argument, or by
> >> >the API.
> >> >
> >>
> >> The default value for max_simd_bitwidth is set depending on the
> >> architecture, 256 for x86/ppc, and UINT16_MAX for ARM. So for example
> >the default on x86 allows for AVX2 and under.
> >> The defaults can be seen in patch 2:
> >> https://patchwork.dpdk.org/patch/79339/
> >
> >Ok, I was expecting to have a runtime check for this. For instance, on intel
> >architecture, it is not known at compilation, it depends on the target which
> >can support up to AVX, AVX2, or AVX512.
> >
> 
> Yes, the actual support will vary, but this max SIMD bitwidth is only an upper limit on what paths can be taken.
> So for example with x86 default at 256, the path will still be chosen based on what the target can support, but it must be AVX2 or a lesser path. 
> This allows for AVX512 to be enabled at runtime, by increasing the max SIMD bitwidth to 512, allowing for that path to be taken where supported.

Ah, this means that AVX512 won't be enabled by default on machine that
support it? Is there a reason for that?

Another question: if the default value for max-simd-bitwidth is 256 on
Intel, and we are running on a target that does not support AVX2, will
the value be updated to 128 at initialization? In other word, is it
still up to the dpdk libraries doing vector code to check the
availability of vector instructions?

Thanks,
Olivier

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-08 10:04             ` Olivier Matz
@ 2020-10-08 10:58               ` Power, Ciara
  2020-10-08 11:48                 ` Bruce Richardson
  0 siblings, 1 reply; 276+ messages in thread
From: Power, Ciara @ 2020-10-08 10:58 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev, Ray Kinsella, Neil Horman, Richardson, Bruce

Hi Olivier,

 
>-----Original Message-----
>From: Olivier Matz <olivier.matz@6wind.com>
>Sent: Thursday 8 October 2020 11:04
>To: Power, Ciara <ciara.power@intel.com>
>Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
><nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
>Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
>
>Hi Ciara,
>
>On Thu, Oct 08, 2020 at 09:25:42AM +0000, Power, Ciara wrote:
>> Hi Olivier,
>>
>>
>> >-----Original Message-----
>> >From: Olivier Matz <olivier.matz@6wind.com>
>> >Sent: Wednesday 7 October 2020 12:18
>> >To: Power, Ciara <ciara.power@intel.com>
>> >Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
>> ><nhorman@tuxdriver.com>; Richardson, Bruce
>> ><bruce.richardson@intel.com>
>> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
>> >
>> >Hi Ciara,
>> >
>> >On Wed, Oct 07, 2020 at 10:47:34AM +0000, Power, Ciara wrote:
>> >> Hi Olivier,
>> >>
>> >> Thanks for reviewing, some comments below.
>> >>
>> >>
>> >> >-----Original Message-----
>> >> >From: Olivier Matz <olivier.matz@6wind.com>
>> >> >Sent: Tuesday 6 October 2020 10:32
>> >> >To: Power, Ciara <ciara.power@intel.com>
>> >> >Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
>> >> ><nhorman@tuxdriver.com>
>> >> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD
>> >> >bitwidth
>> >> >
>> >> >Hi Ciara,
>> >> >
>> >> >Please find some comments below.
>> >> >
>> >> >On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
>> >> >> This patch adds a max SIMD bitwidth EAL configuration. The API
>> >> >> allows for an app to set this value. It can also be set using
>> >> >> EAL argument --force-max-simd-bitwidth, which will lock the
>> >> >> value and override any modifications made by the app.
>> >> >>
<snip>
>> >>
>> >> >> +enum rte_max_simd_t {
>> >> >> +	RTE_NO_SIMD = 64,
>> >> >> +	RTE_MAX_128_SIMD = 128,
>> >> >> +	RTE_MAX_256_SIMD = 256,
>> >> >> +	RTE_MAX_512_SIMD = 512,
>> >> >> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX, };
>> >> >
>> >> >What is the difference between RTE_NO_SIMD and
>> >RTE_MAX_SIMD_DISABLE?
>> >>
>> >> RTE_NO_SIMD has value 64 to limit paths to scalar only.
>> >> RTE_MAX_SIMD_DISABLE sets the highest value possible, so
>> >> essentially disables the limit affecting which vector paths are taken.
>> >> This disable option was added to allow for ARM SVE which will be
>> >> later added, Discussed with Honnappa on a previous version:
>> >> https://patchwork.dpdk.org/patch/76097/
>> >
>> >Ok, so RTE_MAX_SIMD_DISABLE means "disable the max limit", right?
>> >
>> >I feel the name is a bit confusing. What about something like this:
>> >
>> >enum rte_simd {
>> >	RTE_SIMD_DISABLED = 0,
>> >	RTE_SIMD_128 = 128,
>> >	RTE_SIMD_256 = 256,
>> >	RTE_SIMD_512 = 512,
>> >	RTE_SIMD_MAX = UINT16_MAX,
>> >};
>> >
>> >
>>
>> Sure, I can rename these. Although will implement with
>RTE_SIMD_DISABLED=64 to allow for scalar path only.
>
>Out of curiosity, why 64? I thought 0 was a good value for "disabled".
>

64 was chosen because it represents the max bitwidth for the scalar path, 64 bits.
Currently, we use 0 on the command-line to represent the RTE_SIMD_MAX = UINT16_MAX, 
as it is more user friendly to pass "--force-max-simd-bitwidth=0" rather than a max value, the
0 is then internally converted to the max value option. This would not be possible if we have
the scalar option as 0 value.

>> >>
>> >> >The default value in internal_config is 0, so in my understanding
>> >> >rte_get_max_simd_bitwidth() will return 0 if
>> >> >--force-max-simd-bitwidth is not passed. Is it expected?
>> >> >
>> >> >Maybe I'm missing something, but I don't understand why the value
>> >> >in internal_config is not set to the maximum supported SIMD
>> >> >bitwidth by default, and optionally overriden by the command line
>> >> >argument, or by the API.
>> >> >
>> >>
>> >> The default value for max_simd_bitwidth is set depending on the
>> >> architecture, 256 for x86/ppc, and UINT16_MAX for ARM. So for
>> >> example
>> >the default on x86 allows for AVX2 and under.
>> >> The defaults can be seen in patch 2:
>> >> https://patchwork.dpdk.org/patch/79339/
>> >
>> >Ok, I was expecting to have a runtime check for this. For instance,
>> >on intel architecture, it is not known at compilation, it depends on
>> >the target which can support up to AVX, AVX2, or AVX512.
>> >
>>
>> Yes, the actual support will vary, but this max SIMD bitwidth is only an
>upper limit on what paths can be taken.
>> So for example with x86 default at 256, the path will still be chosen based
>on what the target can support, but it must be AVX2 or a lesser path.
>> This allows for AVX512 to be enabled at runtime, by increasing the max
>SIMD bitwidth to 512, allowing for that path to be taken where supported.
>
>Ah, this means that AVX512 won't be enabled by default on machine that
>support it? Is there a reason for that?
>

We can't enable the AVX512 by default because it can cause CPU frequency slowdowns,
But this will allow runtime enabling to take that path if the app/user finds it is the best choice for their use,
by setting the max SIMD bitwidth to 512.

>Another question: if the default value for max-simd-bitwidth is 256 on Intel,
>and we are running on a target that does not support AVX2, will the value be
>updated to 128 at initialization? In other word, is it still up to the dpdk
>libraries doing vector code to check the availability of vector instructions?
>
>Thanks,
>Olivier

No the value is not updated depending on the support, it is just a limit.
Libraries still do the checks they had done previously to check what is supported, and once
that supported path is within the max SIMD bitwidth limit, it is okay to go ahead,
otherwise it will need to choose a lesser path.
For example, if a library supports AVX2, SSE and a scalar path, but the max SIMD bitwidth is set at 128 by app/user,
although the library supports AVX2, it will be limited to choosing the SSE path.
Whereas if for example a library supports only SSE and a scalar path, and the default max SIMD bitwidth is used (256),
the library can still choose SSE as it is below the 256 bit limit, and the limit remains at 256.

Thanks,
Ciara



^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-08 10:58               ` Power, Ciara
@ 2020-10-08 11:48                 ` Bruce Richardson
  2020-10-08 13:03                   ` Olivier Matz
  0 siblings, 1 reply; 276+ messages in thread
From: Bruce Richardson @ 2020-10-08 11:48 UTC (permalink / raw)
  To: Power, Ciara; +Cc: Olivier Matz, dev, Ray Kinsella, Neil Horman

On Thu, Oct 08, 2020 at 11:58:08AM +0100, Power, Ciara wrote:
> Hi Olivier,
> 
> 
> >-----Original Message----- From: Olivier Matz <olivier.matz@6wind.com>
> >Sent: Thursday 8 October 2020 11:04 To: Power, Ciara
> ><ciara.power@intel.com> Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>;
> >Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce
> ><bruce.richardson@intel.com> Subject: Re: [dpdk-dev] [PATCH v3 01/18]
> >eal: add max SIMD bitwidth
> >
> >Hi Ciara,
> >
> >On Thu, Oct 08, 2020 at 09:25:42AM +0000, Power, Ciara wrote:
> >> Hi Olivier,
> >>
> >>
> >> >-----Original Message----- From: Olivier Matz
> >> ><olivier.matz@6wind.com> Sent: Wednesday 7 October 2020 12:18 To:
> >> >Power, Ciara <ciara.power@intel.com> Cc: dev@dpdk.org; Ray Kinsella
> >> ><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> >> >Bruce <bruce.richardson@intel.com> Subject: Re: [dpdk-dev] [PATCH v3
> >> >01/18] eal: add max SIMD bitwidth
> >> >
> >> >Hi Ciara,
> >> >
> >> >On Wed, Oct 07, 2020 at 10:47:34AM +0000, Power, Ciara wrote:
> >> >> Hi Olivier,
> >> >>
> >> >> Thanks for reviewing, some comments below.
> >> >>
> >> >>
> >> >> >-----Original Message----- From: Olivier Matz
> >> >> ><olivier.matz@6wind.com> Sent: Tuesday 6 October 2020 10:32 To:
> >> >> >Power, Ciara <ciara.power@intel.com> Cc: dev@dpdk.org; Ray
> >> >> >Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
> >> >> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD
> >> >> >bitwidth
> >> >> >
> >> >> >Hi Ciara,
> >> >> >
> >> >> >Please find some comments below.
> >> >> >
> >> >> >On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
> >> >> >> This patch adds a max SIMD bitwidth EAL configuration. The API
> >> >> >> allows for an app to set this value. It can also be set using
> >> >> >> EAL argument --force-max-simd-bitwidth, which will lock the
> >> >> >> value and override any modifications made by the app.
> >> >> >>
> <snip>
> >> >>
> >> >> >> +enum rte_max_simd_t { +RTE_NO_SIMD = 64, +RTE_MAX_128_SIMD =
> >> >> >> 128, +RTE_MAX_256_SIMD = 256, +RTE_MAX_512_SIMD = 512,
> >> >> >> +RTE_MAX_SIMD_DISABLE = UINT16_MAX, };
> >> >> >
> >> >> >What is the difference between RTE_NO_SIMD and
> >> >RTE_MAX_SIMD_DISABLE?
> >> >>
> >> >> RTE_NO_SIMD has value 64 to limit paths to scalar only.
> >> >> RTE_MAX_SIMD_DISABLE sets the highest value possible, so
> >> >> essentially disables the limit affecting which vector paths are
> >> >> taken.  This disable option was added to allow for ARM SVE which
> >> >> will be later added, Discussed with Honnappa on a previous version:
> >> >> https://patchwork.dpdk.org/patch/76097/
> >> >
> >> >Ok, so RTE_MAX_SIMD_DISABLE means "disable the max limit", right?
> >> >
> >> >I feel the name is a bit confusing. What about something like this:
> >> >
> >> >enum rte_simd { RTE_SIMD_DISABLED = 0, RTE_SIMD_128 = 128,
> >> >RTE_SIMD_256 = 256, RTE_SIMD_512 = 512, RTE_SIMD_MAX = UINT16_MAX, };
> >> >
> >> >
> >>
> >> Sure, I can rename these. Although will implement with
> >RTE_SIMD_DISABLED=64 to allow for scalar path only.
> >
> >Out of curiosity, why 64? I thought 0 was a good value for "disabled".
> >
> 
> 64 was chosen because it represents the max bitwidth for the scalar path,
> 64 bits.  Currently, we use 0 on the command-line to represent the
> RTE_SIMD_MAX = UINT16_MAX, as it is more user friendly to pass
> "--force-max-simd-bitwidth=0" rather than a max value, the 0 is then
> internally converted to the max value option. This would not be possible
> if we have the scalar option as 0 value.
> 
> >> >>
> >> >> >The default value in internal_config is 0, so in my understanding
> >> >> >rte_get_max_simd_bitwidth() will return 0 if
> >> >> >--force-max-simd-bitwidth is not passed. Is it expected?
> >> >> >
> >> >> >Maybe I'm missing something, but I don't understand why the value
> >> >> >in internal_config is not set to the maximum supported SIMD
> >> >> >bitwidth by default, and optionally overriden by the command line
> >> >> >argument, or by the API.
> >> >> >
> >> >>
> >> >> The default value for max_simd_bitwidth is set depending on the
> >> >> architecture, 256 for x86/ppc, and UINT16_MAX for ARM. So for
> >> >> example
> >> >the default on x86 allows for AVX2 and under.
> >> >> The defaults can be seen in patch 2:
> >> >> https://patchwork.dpdk.org/patch/79339/
> >> >
> >> >Ok, I was expecting to have a runtime check for this. For instance,
> >> >on intel architecture, it is not known at compilation, it depends on
> >> >the target which can support up to AVX, AVX2, or AVX512.
> >> >
> >>
> >> Yes, the actual support will vary, but this max SIMD bitwidth is only
> >> an
> >upper limit on what paths can be taken.
> >> So for example with x86 default at 256, the path will still be chosen
> >> based
> >on what the target can support, but it must be AVX2 or a lesser path.
> >> This allows for AVX512 to be enabled at runtime, by increasing the max
> >SIMD bitwidth to 512, allowing for that path to be taken where
> >supported.
> >
> >Ah, this means that AVX512 won't be enabled by default on machine that
> >support it? Is there a reason for that?
> >
> 
> We can't enable the AVX512 by default because it can cause CPU frequency
> slowdowns, But this will allow runtime enabling to take that path if the
> app/user finds it is the best choice for their use, by setting the max
> SIMD bitwidth to 512.
> 
> >Another question: if the default value for max-simd-bitwidth is 256 on
> >Intel, and we are running on a target that does not support AVX2, will
> >the value be updated to 128 at initialization? In other word, is it
> >still up to the dpdk libraries doing vector code to check the
> >availability of vector instructions?
> >
> >Thanks, Olivier
> 
> No the value is not updated depending on the support, it is just a limit.
> Libraries still do the checks they had done previously to check what is
> supported, and once that supported path is within the max SIMD bitwidth
> limit, it is okay to go ahead, otherwise it will need to choose a lesser
> path.  For example, if a library supports AVX2, SSE and a scalar path,
> but the max SIMD bitwidth is set at 128 by app/user, although the library
> supports AVX2, it will be limited to choosing the SSE path.  Whereas if
> for example a library supports only SSE and a scalar path, and the
> default max SIMD bitwidth is used (256), the library can still choose SSE
> as it is below the 256 bit limit, and the limit remains at 256.
> 
Just to note too that the reason for keeping this separation is that the
actual code path selection in each library is going to have to be
architecture specific, e.g. to choose SSE or NEON for 128-bit width,
whether or not the max-bitwidth functions take the running system into
account. By leaving the libs/drivers to check the CPU support themselves,
it keeps the max-bitwidth functions generic and saves having
architecture-specific code in two places for this.

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-08 11:48                 ` Bruce Richardson
@ 2020-10-08 13:03                   ` Olivier Matz
  0 siblings, 0 replies; 276+ messages in thread
From: Olivier Matz @ 2020-10-08 13:03 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: Power, Ciara, dev, Ray Kinsella, Neil Horman

On Thu, Oct 08, 2020 at 12:48:47PM +0100, Bruce Richardson wrote:
> On Thu, Oct 08, 2020 at 11:58:08AM +0100, Power, Ciara wrote:
> > Hi Olivier,
> > 
> > 
> > >-----Original Message----- From: Olivier Matz <olivier.matz@6wind.com>
> > >Sent: Thursday 8 October 2020 11:04 To: Power, Ciara
> > ><ciara.power@intel.com> Cc: dev@dpdk.org; Ray Kinsella <mdr@ashroe.eu>;
> > >Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce
> > ><bruce.richardson@intel.com> Subject: Re: [dpdk-dev] [PATCH v3 01/18]
> > >eal: add max SIMD bitwidth
> > >
> > >Hi Ciara,
> > >
> > >On Thu, Oct 08, 2020 at 09:25:42AM +0000, Power, Ciara wrote:
> > >> Hi Olivier,
> > >>
> > >>
> > >> >-----Original Message----- From: Olivier Matz
> > >> ><olivier.matz@6wind.com> Sent: Wednesday 7 October 2020 12:18 To:
> > >> >Power, Ciara <ciara.power@intel.com> Cc: dev@dpdk.org; Ray Kinsella
> > >> ><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > >> >Bruce <bruce.richardson@intel.com> Subject: Re: [dpdk-dev] [PATCH v3
> > >> >01/18] eal: add max SIMD bitwidth
> > >> >
> > >> >Hi Ciara,
> > >> >
> > >> >On Wed, Oct 07, 2020 at 10:47:34AM +0000, Power, Ciara wrote:
> > >> >> Hi Olivier,
> > >> >>
> > >> >> Thanks for reviewing, some comments below.
> > >> >>
> > >> >>
> > >> >> >-----Original Message----- From: Olivier Matz
> > >> >> ><olivier.matz@6wind.com> Sent: Tuesday 6 October 2020 10:32 To:
> > >> >> >Power, Ciara <ciara.power@intel.com> Cc: dev@dpdk.org; Ray
> > >> >> >Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
> > >> >> >Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD
> > >> >> >bitwidth
> > >> >> >
> > >> >> >Hi Ciara,
> > >> >> >
> > >> >> >Please find some comments below.
> > >> >> >
> > >> >> >On Wed, Sep 30, 2020 at 02:03:57PM +0100, Ciara Power wrote:
> > >> >> >> This patch adds a max SIMD bitwidth EAL configuration. The API
> > >> >> >> allows for an app to set this value. It can also be set using
> > >> >> >> EAL argument --force-max-simd-bitwidth, which will lock the
> > >> >> >> value and override any modifications made by the app.
> > >> >> >>
> > <snip>
> > >> >>
> > >> >> >> +enum rte_max_simd_t { +RTE_NO_SIMD = 64, +RTE_MAX_128_SIMD =
> > >> >> >> 128, +RTE_MAX_256_SIMD = 256, +RTE_MAX_512_SIMD = 512,
> > >> >> >> +RTE_MAX_SIMD_DISABLE = UINT16_MAX, };
> > >> >> >
> > >> >> >What is the difference between RTE_NO_SIMD and
> > >> >RTE_MAX_SIMD_DISABLE?
> > >> >>
> > >> >> RTE_NO_SIMD has value 64 to limit paths to scalar only.
> > >> >> RTE_MAX_SIMD_DISABLE sets the highest value possible, so
> > >> >> essentially disables the limit affecting which vector paths are
> > >> >> taken.  This disable option was added to allow for ARM SVE which
> > >> >> will be later added, Discussed with Honnappa on a previous version:
> > >> >> https://patchwork.dpdk.org/patch/76097/
> > >> >
> > >> >Ok, so RTE_MAX_SIMD_DISABLE means "disable the max limit", right?
> > >> >
> > >> >I feel the name is a bit confusing. What about something like this:
> > >> >
> > >> >enum rte_simd { RTE_SIMD_DISABLED = 0, RTE_SIMD_128 = 128,
> > >> >RTE_SIMD_256 = 256, RTE_SIMD_512 = 512, RTE_SIMD_MAX = UINT16_MAX, };
> > >> >
> > >> >
> > >>
> > >> Sure, I can rename these. Although will implement with
> > >RTE_SIMD_DISABLED=64 to allow for scalar path only.
> > >
> > >Out of curiosity, why 64? I thought 0 was a good value for "disabled".
> > >
> > 
> > 64 was chosen because it represents the max bitwidth for the scalar path,
> > 64 bits.  Currently, we use 0 on the command-line to represent the
> > RTE_SIMD_MAX = UINT16_MAX, as it is more user friendly to pass
> > "--force-max-simd-bitwidth=0" rather than a max value, the 0 is then
> > internally converted to the max value option. This would not be possible
> > if we have the scalar option as 0 value.
> > 
> > >> >>
> > >> >> >The default value in internal_config is 0, so in my understanding
> > >> >> >rte_get_max_simd_bitwidth() will return 0 if
> > >> >> >--force-max-simd-bitwidth is not passed. Is it expected?
> > >> >> >
> > >> >> >Maybe I'm missing something, but I don't understand why the value
> > >> >> >in internal_config is not set to the maximum supported SIMD
> > >> >> >bitwidth by default, and optionally overriden by the command line
> > >> >> >argument, or by the API.
> > >> >> >
> > >> >>
> > >> >> The default value for max_simd_bitwidth is set depending on the
> > >> >> architecture, 256 for x86/ppc, and UINT16_MAX for ARM. So for
> > >> >> example
> > >> >the default on x86 allows for AVX2 and under.
> > >> >> The defaults can be seen in patch 2:
> > >> >> https://patchwork.dpdk.org/patch/79339/
> > >> >
> > >> >Ok, I was expecting to have a runtime check for this. For instance,
> > >> >on intel architecture, it is not known at compilation, it depends on
> > >> >the target which can support up to AVX, AVX2, or AVX512.
> > >> >
> > >>
> > >> Yes, the actual support will vary, but this max SIMD bitwidth is only
> > >> an
> > >upper limit on what paths can be taken.
> > >> So for example with x86 default at 256, the path will still be chosen
> > >> based
> > >on what the target can support, but it must be AVX2 or a lesser path.
> > >> This allows for AVX512 to be enabled at runtime, by increasing the max
> > >SIMD bitwidth to 512, allowing for that path to be taken where
> > >supported.
> > >
> > >Ah, this means that AVX512 won't be enabled by default on machine that
> > >support it? Is there a reason for that?
> > >
> > 
> > We can't enable the AVX512 by default because it can cause CPU frequency
> > slowdowns, But this will allow runtime enabling to take that path if the
> > app/user finds it is the best choice for their use, by setting the max
> > SIMD bitwidth to 512.
> > 
> > >Another question: if the default value for max-simd-bitwidth is 256 on
> > >Intel, and we are running on a target that does not support AVX2, will
> > >the value be updated to 128 at initialization? In other word, is it
> > >still up to the dpdk libraries doing vector code to check the
> > >availability of vector instructions?
> > >
> > >Thanks, Olivier
> > 
> > No the value is not updated depending on the support, it is just a limit.
> > Libraries still do the checks they had done previously to check what is
> > supported, and once that supported path is within the max SIMD bitwidth
> > limit, it is okay to go ahead, otherwise it will need to choose a lesser
> > path.  For example, if a library supports AVX2, SSE and a scalar path,
> > but the max SIMD bitwidth is set at 128 by app/user, although the library
> > supports AVX2, it will be limited to choosing the SSE path.  Whereas if
> > for example a library supports only SSE and a scalar path, and the
> > default max SIMD bitwidth is used (256), the library can still choose SSE
> > as it is below the 256 bit limit, and the limit remains at 256.
> > 
> Just to note too that the reason for keeping this separation is that the
> actual code path selection in each library is going to have to be
> architecture specific, e.g. to choose SSE or NEON for 128-bit width,
> whether or not the max-bitwidth functions take the running system into
> account. By leaving the libs/drivers to check the CPU support themselves,
> it keeps the max-bitwidth functions generic and saves having
> architecture-specific code in two places for this.

Ok, that's clearer to me, thanks Ciara and Bruce.

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth Ciara Power
                       ` (2 preceding siblings ...)
  2020-10-06 11:50     ` Maxime Coquelin
@ 2020-10-08 13:07     ` Ananyev, Konstantin
  2020-10-08 13:14       ` Bruce Richardson
  2020-10-08 13:19     ` Ananyev, Konstantin
  2020-10-08 15:28     ` David Marchand
  5 siblings, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-08 13:07 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Power, Ciara, Ray Kinsella, Neil Horman


> This patch adds a max SIMD bitwidth EAL configuration. The API allows
> for an app to set this value. It can also be set using EAL argument
> --force-max-simd-bitwidth, which will lock the value and override any
> modifications made by the app.
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v3:
>   - Added enum value to essentially disable using max SIMD to choose
>     paths, intended for use by ARM SVE.
>   - Fixed parsing bitwidth argument to return an error for values
>     greater than uint16_t.
> v2: Added to Doxygen comment for API.
> ---
>  lib/librte_eal/common/eal_common_options.c | 64 ++++++++++++++++++++++
>  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>  lib/librte_eal/common/eal_options.h        |  2 +
>  lib/librte_eal/include/rte_eal.h           | 33 +++++++++++
>  lib/librte_eal/rte_eal_version.map         |  4 ++
>  5 files changed, 111 insertions(+)
> 
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index a5426e1234..e9117a96af 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -102,6 +102,7 @@ eal_long_options[] = {
>  	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
>  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> +	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
>  	{0,                     0, NULL, 0                        }
>  };
> 
> @@ -1309,6 +1310,34 @@ eal_parse_iova_mode(const char *name)
>  	return 0;
>  }
> 
> +static int
> +eal_parse_simd_bitwidth(const char *arg, bool locked)
> +{
> +	char *end;
> +	unsigned long bitwidth;
> +	int ret;
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +
> +	if (arg == NULL || arg[0] == '\0')
> +		return -1;
> +
> +	errno = 0;
> +	bitwidth = strtoul(arg, &end, 0);
> +
> +	/* check for errors */
> +	if (bitwidth > UINT16_MAX || errno != 0 || end == NULL || *end != '\0')
> +		return -1;
> +
> +	if (bitwidth == 0)
> +		bitwidth = UINT16_MAX;
> +	ret = rte_set_max_simd_bitwidth(bitwidth);
> +	if (ret < 0)
> +		return -1;
> +	internal_conf->max_simd_bitwidth.locked = locked;
> +	return 0;
> +}
> +
>  static int
>  eal_parse_base_virtaddr(const char *arg)
>  {
> @@ -1707,6 +1736,13 @@ eal_parse_common_option(int opt, const char *optarg,
>  	case OPT_NO_TELEMETRY_NUM:
>  		conf->no_telemetry = 1;
>  		break;
> +	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
> +		if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
> +			RTE_LOG(ERR, EAL, "invalid parameter for --"
> +					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
> +			return -1;
> +		}
> +		break;
> 
>  	/* don't know what to do, leave this to caller */
>  	default:
> @@ -1903,6 +1939,33 @@ eal_check_common_options(struct internal_config *internal_cfg)
>  	return 0;
>  }
> 
> +uint16_t
> +rte_get_max_simd_bitwidth(void)
> +{
> +	const struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	return internal_conf->max_simd_bitwidth.bitwidth;
> +}
> +
> +int
> +rte_set_max_simd_bitwidth(uint16_t bitwidth)
> +{
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	if (internal_conf->max_simd_bitwidth.locked) {
> +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
> +		return -EPERM;
> +	}
> +
> +	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth < RTE_NO_SIMD ||
> +			!rte_is_power_of_2(bitwidth))) {
> +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> +		return -EINVAL;
> +	}
> +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> +	return 0;
> +}
> +
>  void
>  eal_common_usage(void)
>  {
> @@ -1981,6 +2044,7 @@ eal_common_usage(void)
>  	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
>  	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
>  	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
> +	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
>  	       "\nEAL options for DEBUG use only:\n"
>  	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
>  	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 13f93388a7..367e0cc19e 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -33,6 +33,12 @@ struct hugepage_info {
>  	int lock_descriptor;    /**< file descriptor for hugepage dir */
>  };
> 
> +struct simd_bitwidth {
> +	/**< flag indicating if bitwidth is locked from further modification */
> +	bool locked;
> +	uint16_t bitwidth; /**< bitwidth value */
> +};
> +
>  /**
>   * internal configuration
>   */
> @@ -85,6 +91,8 @@ struct internal_config {
>  	volatile unsigned int init_complete;
>  	/**< indicates whether EAL has completed initialization */
>  	unsigned int no_telemetry; /**< true to disable Telemetry */
> +	/** max simd bitwidth path to use */
> +	struct simd_bitwidth max_simd_bitwidth;
>  };
> 
>  void eal_reset_internal_config(struct internal_config *internal_cfg);
> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> index 89769d48b4..ef33979664 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -85,6 +85,8 @@ enum {
>  	OPT_TELEMETRY_NUM,
>  #define OPT_NO_TELEMETRY      "no-telemetry"
>  	OPT_NO_TELEMETRY_NUM,
> +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
> +	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
>  	OPT_LONG_MAX_NUM
>  };
> 
> diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> index ddcf6a2e7a..fb739f3474 100644
> --- a/lib/librte_eal/include/rte_eal.h
> +++ b/lib/librte_eal/include/rte_eal.h
> @@ -43,6 +43,14 @@ enum rte_proc_type_t {
>  	RTE_PROC_INVALID
>  };
> 
> +enum rte_max_simd_t {
> +	RTE_NO_SIMD = 64,

While I do understand the idea of having that value from consistency point of view,
I wonder do we really need to allow user to specify values smaller then 128.
At least on x86 we always have 128 bit SIMD enabled, even for -Dmachine=default.
So seems no much point to forbid libraries using SSE code-path when compiler
is free to insert SSE instructions on its own will.  

> +	RTE_MAX_128_SIMD = 128,
> +	RTE_MAX_256_SIMD = 256,
> +	RTE_MAX_512_SIMD = 512,
> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX,

As a nit, I think it is safe enough to have this last value 
(RTE_MAX_SIMD_DISABLE or RTE_MAX_SIMD_MAX) equal to (INT16_MAX + 1).
That would be big enough to probably never hit actual HW limit,
while it still remains power of two, as other values. 

> +};
> +
>  /**
>   * Get the process type in a multi-process setup
>   *
> @@ -51,6 +59,31 @@ enum rte_proc_type_t {
>   */
>  enum rte_proc_type_t rte_eal_process_type(void);
> 
> +/**
> + * Get the supported SIMD bitwidth.
> + *
> + * @return
> + *   uint16_t bitwidth.
> + */
> +__rte_experimental
> +uint16_t rte_get_max_simd_bitwidth(void);
> +
> +/**
> + * Set the supported SIMD bitwidth.
> + * This API should only be called once at initialization, before EAL init.
> + *
> + * @param bitwidth
> + *   uint16_t bitwidth.
> + * @return
> + *   0 on success.
> + * @return
> + *   -EINVAL on invalid bitwidth parameter.
> + * @return
> + *   -EPERM if bitwidth is locked.
> + */
> +__rte_experimental
> +int rte_set_max_simd_bitwidth(uint16_t bitwidth);
> +
>  /**
>   * Request iopl privilege for all RPL.
>   *
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index c32461c663..17a7195a3d 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -397,6 +397,10 @@ EXPERIMENTAL {
>  	rte_service_lcore_may_be_active;
>  	rte_thread_register;
>  	rte_thread_unregister;
> +
> +	# added in 20.11
> +	rte_get_max_simd_bitwidth;
> +	rte_set_max_simd_bitwidth;
>  };
> 
>  INTERNAL {
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-08 13:07     ` Ananyev, Konstantin
@ 2020-10-08 13:14       ` Bruce Richardson
  2020-10-08 14:07         ` Ananyev, Konstantin
  0 siblings, 1 reply; 276+ messages in thread
From: Bruce Richardson @ 2020-10-08 13:14 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: Power, Ciara, dev, Ray Kinsella, Neil Horman

On Thu, Oct 08, 2020 at 01:07:26PM +0000, Ananyev, Konstantin wrote:
> 
> > This patch adds a max SIMD bitwidth EAL configuration. The API allows
> > for an app to set this value. It can also be set using EAL argument
> > --force-max-simd-bitwidth, which will lock the value and override any
> > modifications made by the app.
> > 
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > 
> > ---
> > v3:
> >   - Added enum value to essentially disable using max SIMD to choose
> >     paths, intended for use by ARM SVE.
> >   - Fixed parsing bitwidth argument to return an error for values
> >     greater than uint16_t.
> > v2: Added to Doxygen comment for API.
> > ---
> >  lib/librte_eal/common/eal_common_options.c | 64 ++++++++++++++++++++++
> >  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
> >  lib/librte_eal/common/eal_options.h        |  2 +
> >  lib/librte_eal/include/rte_eal.h           | 33 +++++++++++
> >  lib/librte_eal/rte_eal_version.map         |  4 ++
> >  5 files changed, 111 insertions(+)
> > 
> > diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> > index a5426e1234..e9117a96af 100644
> > --- a/lib/librte_eal/common/eal_common_options.c
> > +++ b/lib/librte_eal/common/eal_common_options.c
> > @@ -102,6 +102,7 @@ eal_long_options[] = {
> >  	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
> >  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
> >  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> > +	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> >  	{0,                     0, NULL, 0                        }
> >  };
> > 
> > @@ -1309,6 +1310,34 @@ eal_parse_iova_mode(const char *name)
> >  	return 0;
> >  }
> > 
> > +static int
> > +eal_parse_simd_bitwidth(const char *arg, bool locked)
> > +{
> > +	char *end;
> > +	unsigned long bitwidth;
> > +	int ret;
> > +	struct internal_config *internal_conf =
> > +		eal_get_internal_configuration();
> > +
> > +	if (arg == NULL || arg[0] == '\0')
> > +		return -1;
> > +
> > +	errno = 0;
> > +	bitwidth = strtoul(arg, &end, 0);
> > +
> > +	/* check for errors */
> > +	if (bitwidth > UINT16_MAX || errno != 0 || end == NULL || *end != '\0')
> > +		return -1;
> > +
> > +	if (bitwidth == 0)
> > +		bitwidth = UINT16_MAX;
> > +	ret = rte_set_max_simd_bitwidth(bitwidth);
> > +	if (ret < 0)
> > +		return -1;
> > +	internal_conf->max_simd_bitwidth.locked = locked;
> > +	return 0;
> > +}
> > +
> >  static int
> >  eal_parse_base_virtaddr(const char *arg)
> >  {
> > @@ -1707,6 +1736,13 @@ eal_parse_common_option(int opt, const char *optarg,
> >  	case OPT_NO_TELEMETRY_NUM:
> >  		conf->no_telemetry = 1;
> >  		break;
> > +	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
> > +		if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
> > +			RTE_LOG(ERR, EAL, "invalid parameter for --"
> > +					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
> > +			return -1;
> > +		}
> > +		break;
> > 
> >  	/* don't know what to do, leave this to caller */
> >  	default:
> > @@ -1903,6 +1939,33 @@ eal_check_common_options(struct internal_config *internal_cfg)
> >  	return 0;
> >  }
> > 
> > +uint16_t
> > +rte_get_max_simd_bitwidth(void)
> > +{
> > +	const struct internal_config *internal_conf =
> > +		eal_get_internal_configuration();
> > +	return internal_conf->max_simd_bitwidth.bitwidth;
> > +}
> > +
> > +int
> > +rte_set_max_simd_bitwidth(uint16_t bitwidth)
> > +{
> > +	struct internal_config *internal_conf =
> > +		eal_get_internal_configuration();
> > +	if (internal_conf->max_simd_bitwidth.locked) {
> > +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
> > +		return -EPERM;
> > +	}
> > +
> > +	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth < RTE_NO_SIMD ||
> > +			!rte_is_power_of_2(bitwidth))) {
> > +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> > +		return -EINVAL;
> > +	}
> > +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> > +	return 0;
> > +}
> > +
> >  void
> >  eal_common_usage(void)
> >  {
> > @@ -1981,6 +2044,7 @@ eal_common_usage(void)
> >  	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
> >  	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
> >  	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
> > +	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
> >  	       "\nEAL options for DEBUG use only:\n"
> >  	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
> >  	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> > diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> > index 13f93388a7..367e0cc19e 100644
> > --- a/lib/librte_eal/common/eal_internal_cfg.h
> > +++ b/lib/librte_eal/common/eal_internal_cfg.h
> > @@ -33,6 +33,12 @@ struct hugepage_info {
> >  	int lock_descriptor;    /**< file descriptor for hugepage dir */
> >  };
> > 
> > +struct simd_bitwidth {
> > +	/**< flag indicating if bitwidth is locked from further modification */
> > +	bool locked;
> > +	uint16_t bitwidth; /**< bitwidth value */
> > +};
> > +
> >  /**
> >   * internal configuration
> >   */
> > @@ -85,6 +91,8 @@ struct internal_config {
> >  	volatile unsigned int init_complete;
> >  	/**< indicates whether EAL has completed initialization */
> >  	unsigned int no_telemetry; /**< true to disable Telemetry */
> > +	/** max simd bitwidth path to use */
> > +	struct simd_bitwidth max_simd_bitwidth;
> >  };
> > 
> >  void eal_reset_internal_config(struct internal_config *internal_cfg);
> > diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> > index 89769d48b4..ef33979664 100644
> > --- a/lib/librte_eal/common/eal_options.h
> > +++ b/lib/librte_eal/common/eal_options.h
> > @@ -85,6 +85,8 @@ enum {
> >  	OPT_TELEMETRY_NUM,
> >  #define OPT_NO_TELEMETRY      "no-telemetry"
> >  	OPT_NO_TELEMETRY_NUM,
> > +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
> > +	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
> >  	OPT_LONG_MAX_NUM
> >  };
> > 
> > diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> > index ddcf6a2e7a..fb739f3474 100644
> > --- a/lib/librte_eal/include/rte_eal.h
> > +++ b/lib/librte_eal/include/rte_eal.h
> > @@ -43,6 +43,14 @@ enum rte_proc_type_t {
> >  	RTE_PROC_INVALID
> >  };
> > 
> > +enum rte_max_simd_t {
> > +	RTE_NO_SIMD = 64,
> 
> While I do understand the idea of having that value from consistency point of view,
> I wonder do we really need to allow user to specify values smaller then 128.
> At least on x86 we always have 128 bit SIMD enabled, even for -Dmachine=default.
> So seems no much point to forbid libraries using SSE code-path when compiler
> is free to insert SSE instructions on its own will.  
> 

The reason to support this is for testing purposes, as it allows an easy
way for a tester to check out any scalar code paths - which are often
common across architectures.

> > +	RTE_MAX_128_SIMD = 128,
> > +	RTE_MAX_256_SIMD = 256,
> > +	RTE_MAX_512_SIMD = 512,
> > +	RTE_MAX_SIMD_DISABLE = UINT16_MAX,
> 
> As a nit, I think it is safe enough to have this last value 
> (RTE_MAX_SIMD_DISABLE or RTE_MAX_SIMD_MAX) equal to (INT16_MAX + 1).
> That would be big enough to probably never hit actual HW limit,
> while it still remains power of two, as other values. 
> 

I actually think it's probably clearer as-is, because the fact of the rest
being powers of 2 is irrelevant since we just check greater than or less
than. If we did change it, then we need to put in a comment explaining why
the plus-one, while as it is now it's clearly a placeholder $BIGNUM.

/Bruce

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 02/18] eal: add default SIMD bitwidth values
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 02/18] eal: add default SIMD bitwidth values Ciara Power
  2020-10-05 19:35     ` David Christensen
@ 2020-10-08 13:17     ` Ananyev, Konstantin
  2020-10-08 16:45     ` David Marchand
  2 siblings, 0 replies; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-08 13:17 UTC (permalink / raw)
  To: Power, Ciara, dev
  Cc: Ruifeng Wang, Jerin Jacob, Honnappa Nagarahalli,
	David Christensen, Jan Viktorin, Richardson, Bruce


> 
> Each arch has a define for the default SIMD bitwidth value, this is used
> on EAL init to set the config max SIMD bitwidth.
> 
> Cc: Ruifeng Wang <ruifeng.wang@arm.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Cc: David Christensen <drc@linux.vnet.ibm.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v3:
>   - Removed unnecessary define in generic rte_vect.h
>   - Changed default bitwidth for ARM to UINT16_MAX, to allow for SVE.
> v2: Changed default bitwidth for Arm to 128.
> ---
>  lib/librte_eal/arm/include/rte_vect.h      | 2 ++
>  lib/librte_eal/common/eal_common_options.c | 3 +++
>  lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
>  lib/librte_eal/x86/include/rte_vect.h      | 2 ++
>  4 files changed, 9 insertions(+)
> 
> diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
> index 01c51712a1..a3508e69d5 100644
> --- a/lib/librte_eal/arm/include/rte_vect.h
> +++ b/lib/librte_eal/arm/include/rte_vect.h
> @@ -14,6 +14,8 @@
>  extern "C" {
>  #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH UINT16_MAX

As a nit - can we use here values from enum rte_max_simd_t?
That would make things more consistent...
Probably you'll need to move enum rte_max_simd_t definition
into rte_vect.h for that.

> +
>  typedef int32x4_t xmm_t;
> 
>  #define	XMM_SIZE	(sizeof(xmm_t))
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index e9117a96af..d412cae89b 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -35,6 +35,7 @@
>  #ifndef RTE_EXEC_ENV_WINDOWS
>  #include <rte_telemetry.h>
>  #endif
> +#include <rte_vect.h>
> 
>  #include "eal_internal_cfg.h"
>  #include "eal_options.h"
> @@ -344,6 +345,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
>  	internal_cfg->user_mbuf_pool_ops_name = NULL;
>  	CPU_ZERO(&internal_cfg->ctrl_cpuset);
>  	internal_cfg->init_complete = 0;
> +	internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
> +	internal_cfg->max_simd_bitwidth.locked = 0;
>  }
> 
>  static int
> diff --git a/lib/librte_eal/ppc/include/rte_vect.h b/lib/librte_eal/ppc/include/rte_vect.h
> index b0545c878c..70fbd0c423 100644
> --- a/lib/librte_eal/ppc/include/rte_vect.h
> +++ b/lib/librte_eal/ppc/include/rte_vect.h
> @@ -15,6 +15,8 @@
>  extern "C" {
>  #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  typedef vector signed int xmm_t;
> 
>  #define	XMM_SIZE	(sizeof(xmm_t))
> diff --git a/lib/librte_eal/x86/include/rte_vect.h b/lib/librte_eal/x86/include/rte_vect.h
> index df5a607623..b1df75aca7 100644
> --- a/lib/librte_eal/x86/include/rte_vect.h
> +++ b/lib/librte_eal/x86/include/rte_vect.h
> @@ -35,6 +35,8 @@
>  extern "C" {
>  #endif
> 
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  typedef __m128i xmm_t;
> 
>  #define	XMM_SIZE	(sizeof(xmm_t))
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth Ciara Power
                       ` (3 preceding siblings ...)
  2020-10-08 13:07     ` Ananyev, Konstantin
@ 2020-10-08 13:19     ` Ananyev, Konstantin
  2020-10-08 15:28     ` David Marchand
  5 siblings, 0 replies; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-08 13:19 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Power, Ciara, Ray Kinsella, Neil Horman



> 
> diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> index ddcf6a2e7a..fb739f3474 100644
> --- a/lib/librte_eal/include/rte_eal.h
> +++ b/lib/librte_eal/include/rte_eal.h
> @@ -43,6 +43,14 @@ enum rte_proc_type_t {
>  	RTE_PROC_INVALID
>  };
> 
> +enum rte_max_simd_t {

Just one more nit, why do we need '_t' suffix here?
Usually we '_t' is reserved for typedefs only.

> +	RTE_NO_SIMD = 64,
> +	RTE_MAX_128_SIMD = 128,
> +	RTE_MAX_256_SIMD = 256,
> +	RTE_MAX_512_SIMD = 512,
> +	RTE_MAX_SIMD_DISABLE = UINT16_MAX,
> +};
> +

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-08 13:14       ` Bruce Richardson
@ 2020-10-08 14:07         ` Ananyev, Konstantin
  2020-10-08 14:18           ` Bruce Richardson
  0 siblings, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-08 14:07 UTC (permalink / raw)
  To: Richardson, Bruce; +Cc: Power, Ciara, dev, Ray Kinsella, Neil Horman


> On Thu, Oct 08, 2020 at 01:07:26PM +0000, Ananyev, Konstantin wrote:
> >
> > > This patch adds a max SIMD bitwidth EAL configuration. The API allows
> > > for an app to set this value. It can also be set using EAL argument
> > > --force-max-simd-bitwidth, which will lock the value and override any
> > > modifications made by the app.
> > >
> > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > >
> > > ---
> > > v3:
> > >   - Added enum value to essentially disable using max SIMD to choose
> > >     paths, intended for use by ARM SVE.
> > >   - Fixed parsing bitwidth argument to return an error for values
> > >     greater than uint16_t.
> > > v2: Added to Doxygen comment for API.
> > > ---
> > >  lib/librte_eal/common/eal_common_options.c | 64 ++++++++++++++++++++++
> > >  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
> > >  lib/librte_eal/common/eal_options.h        |  2 +
> > >  lib/librte_eal/include/rte_eal.h           | 33 +++++++++++
> > >  lib/librte_eal/rte_eal_version.map         |  4 ++
> > >  5 files changed, 111 insertions(+)
> > >
> > > diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> > > index a5426e1234..e9117a96af 100644
> > > --- a/lib/librte_eal/common/eal_common_options.c
> > > +++ b/lib/librte_eal/common/eal_common_options.c
> > > @@ -102,6 +102,7 @@ eal_long_options[] = {
> > >  	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
> > >  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
> > >  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> > > +	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> > >  	{0,                     0, NULL, 0                        }
> > >  };
> > >
> > > @@ -1309,6 +1310,34 @@ eal_parse_iova_mode(const char *name)
> > >  	return 0;
> > >  }
> > >
> > > +static int
> > > +eal_parse_simd_bitwidth(const char *arg, bool locked)
> > > +{
> > > +	char *end;
> > > +	unsigned long bitwidth;
> > > +	int ret;
> > > +	struct internal_config *internal_conf =
> > > +		eal_get_internal_configuration();
> > > +
> > > +	if (arg == NULL || arg[0] == '\0')
> > > +		return -1;
> > > +
> > > +	errno = 0;
> > > +	bitwidth = strtoul(arg, &end, 0);
> > > +
> > > +	/* check for errors */
> > > +	if (bitwidth > UINT16_MAX || errno != 0 || end == NULL || *end != '\0')
> > > +		return -1;
> > > +
> > > +	if (bitwidth == 0)
> > > +		bitwidth = UINT16_MAX;
> > > +	ret = rte_set_max_simd_bitwidth(bitwidth);
> > > +	if (ret < 0)
> > > +		return -1;
> > > +	internal_conf->max_simd_bitwidth.locked = locked;
> > > +	return 0;
> > > +}
> > > +
> > >  static int
> > >  eal_parse_base_virtaddr(const char *arg)
> > >  {
> > > @@ -1707,6 +1736,13 @@ eal_parse_common_option(int opt, const char *optarg,
> > >  	case OPT_NO_TELEMETRY_NUM:
> > >  		conf->no_telemetry = 1;
> > >  		break;
> > > +	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
> > > +		if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
> > > +			RTE_LOG(ERR, EAL, "invalid parameter for --"
> > > +					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
> > > +			return -1;
> > > +		}
> > > +		break;
> > >
> > >  	/* don't know what to do, leave this to caller */
> > >  	default:
> > > @@ -1903,6 +1939,33 @@ eal_check_common_options(struct internal_config *internal_cfg)
> > >  	return 0;
> > >  }
> > >
> > > +uint16_t
> > > +rte_get_max_simd_bitwidth(void)
> > > +{
> > > +	const struct internal_config *internal_conf =
> > > +		eal_get_internal_configuration();
> > > +	return internal_conf->max_simd_bitwidth.bitwidth;
> > > +}
> > > +
> > > +int
> > > +rte_set_max_simd_bitwidth(uint16_t bitwidth)
> > > +{
> > > +	struct internal_config *internal_conf =
> > > +		eal_get_internal_configuration();
> > > +	if (internal_conf->max_simd_bitwidth.locked) {
> > > +		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
> > > +		return -EPERM;
> > > +	}
> > > +
> > > +	if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth < RTE_NO_SIMD ||
> > > +			!rte_is_power_of_2(bitwidth))) {
> > > +		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> > > +		return -EINVAL;
> > > +	}
> > > +	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> > > +	return 0;
> > > +}
> > > +
> > >  void
> > >  eal_common_usage(void)
> > >  {
> > > @@ -1981,6 +2044,7 @@ eal_common_usage(void)
> > >  	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
> > >  	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
> > >  	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
> > > +	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
> > >  	       "\nEAL options for DEBUG use only:\n"
> > >  	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
> > >  	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> > > diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> > > index 13f93388a7..367e0cc19e 100644
> > > --- a/lib/librte_eal/common/eal_internal_cfg.h
> > > +++ b/lib/librte_eal/common/eal_internal_cfg.h
> > > @@ -33,6 +33,12 @@ struct hugepage_info {
> > >  	int lock_descriptor;    /**< file descriptor for hugepage dir */
> > >  };
> > >
> > > +struct simd_bitwidth {
> > > +	/**< flag indicating if bitwidth is locked from further modification */
> > > +	bool locked;
> > > +	uint16_t bitwidth; /**< bitwidth value */
> > > +};
> > > +
> > >  /**
> > >   * internal configuration
> > >   */
> > > @@ -85,6 +91,8 @@ struct internal_config {
> > >  	volatile unsigned int init_complete;
> > >  	/**< indicates whether EAL has completed initialization */
> > >  	unsigned int no_telemetry; /**< true to disable Telemetry */
> > > +	/** max simd bitwidth path to use */
> > > +	struct simd_bitwidth max_simd_bitwidth;
> > >  };
> > >
> > >  void eal_reset_internal_config(struct internal_config *internal_cfg);
> > > diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> > > index 89769d48b4..ef33979664 100644
> > > --- a/lib/librte_eal/common/eal_options.h
> > > +++ b/lib/librte_eal/common/eal_options.h
> > > @@ -85,6 +85,8 @@ enum {
> > >  	OPT_TELEMETRY_NUM,
> > >  #define OPT_NO_TELEMETRY      "no-telemetry"
> > >  	OPT_NO_TELEMETRY_NUM,
> > > +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
> > > +	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
> > >  	OPT_LONG_MAX_NUM
> > >  };
> > >
> > > diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> > > index ddcf6a2e7a..fb739f3474 100644
> > > --- a/lib/librte_eal/include/rte_eal.h
> > > +++ b/lib/librte_eal/include/rte_eal.h
> > > @@ -43,6 +43,14 @@ enum rte_proc_type_t {
> > >  	RTE_PROC_INVALID
> > >  };
> > >
> > > +enum rte_max_simd_t {
> > > +	RTE_NO_SIMD = 64,
> >
> > While I do understand the idea of having that value from consistency point of view,
> > I wonder do we really need to allow user to specify values smaller then 128.
> > At least on x86 we always have 128 bit SIMD enabled, even for -Dmachine=default.
> > So seems no much point to forbid libraries using SSE code-path when compiler
> > is free to insert SSE instructions on its own will.
> >
> 
> The reason to support this is for testing purposes, as it allows an easy
> way for a tester to check out any scalar code paths - which are often
> common across architectures.

If it is just for testing things in a consistent way, then it is  probably ok.
The thing that worries me - later in this series there are patches
that insert extra checks into inline functions that use SSE instincts:
https://patches.dpdk.org/patch/79355/ (lpm: choose vector path at runtime).
Which seems like a total overkill for me.

> 
> > > +	RTE_MAX_128_SIMD = 128,
> > > +	RTE_MAX_256_SIMD = 256,
> > > +	RTE_MAX_512_SIMD = 512,
> > > +	RTE_MAX_SIMD_DISABLE = UINT16_MAX,
> >
> > As a nit, I think it is safe enough to have this last value
> > (RTE_MAX_SIMD_DISABLE or RTE_MAX_SIMD_MAX) equal to (INT16_MAX + 1).
> > That would be big enough to probably never hit actual HW limit,
> > while it still remains power of two, as other values.
> >
> 
> I actually think it's probably clearer as-is, because the fact of the rest
> being powers of 2 is irrelevant since we just check greater than or less
> than. 

Well, rte_set_max_simd_bitwidth() does accept only power of two values
_AND_ this special one (UINT16_MAX).
By changing it to 2^15, we can remove that special value test.  

> If we did change it, then we need to put in a comment explaining why
> the plus-one, 

I don't think it is that big deal to put a comment,
plus for UINT16_MAX we do need some explanation too, right?

>while as it is now it's clearly a placeholder $BIGNUM.
> 
> /Bruce

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-08 14:07         ` Ananyev, Konstantin
@ 2020-10-08 14:18           ` Bruce Richardson
  2020-10-08 14:26             ` Power, Ciara
  0 siblings, 1 reply; 276+ messages in thread
From: Bruce Richardson @ 2020-10-08 14:18 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: Power, Ciara, dev, Ray Kinsella, Neil Horman

On Thu, Oct 08, 2020 at 03:07:54PM +0100, Ananyev, Konstantin wrote:
> 
> > On Thu, Oct 08, 2020 at 01:07:26PM +0000, Ananyev, Konstantin wrote:
> > >
> > > > This patch adds a max SIMD bitwidth EAL configuration. The API allows
> > > > for an app to set this value. It can also be set using EAL argument
> > > > --force-max-simd-bitwidth, which will lock the value and override any
> > > > modifications made by the app.
> > > >
> > > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > > >
> > > > ---
> > > > v3:
> > > >   - Added enum value to essentially disable using max SIMD to choose
> > > >     paths, intended for use by ARM SVE.
> > > >   - Fixed parsing bitwidth argument to return an error for values
> > > >     greater than uint16_t.
> > > > v2: Added to Doxygen comment for API.
> > > > ---
> > > >  lib/librte_eal/common/eal_common_options.c | 64 ++++++++++++++++++++++
> > > >  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
> > > >  lib/librte_eal/common/eal_options.h        |  2 +
> > > >  lib/librte_eal/include/rte_eal.h           | 33 +++++++++++
> > > >  lib/librte_eal/rte_eal_version.map         |  4 ++
> > > >  5 files changed, 111 insertions(+)
> > > >
> > > > diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> > > > index a5426e1234..e9117a96af 100644
> > > > --- a/lib/librte_eal/common/eal_common_options.c
> > > > +++ b/lib/librte_eal/common/eal_common_options.c
> > > > @@ -102,6 +102,7 @@ eal_long_options[] = {
> > > >  {OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
> > > >  {OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
> > > >  {OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> > > > +{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> > > >  {0,                     0, NULL, 0                        }
> > > >  };
> > > >
> > > > @@ -1309,6 +1310,34 @@ eal_parse_iova_mode(const char *name)
> > > >  return 0;
> > > >  }
> > > >
> > > > +static int
> > > > +eal_parse_simd_bitwidth(const char *arg, bool locked)
> > > > +{
> > > > +char *end;
> > > > +unsigned long bitwidth;
> > > > +int ret;
> > > > +struct internal_config *internal_conf =
> > > > +eal_get_internal_configuration();
> > > > +
> > > > +if (arg == NULL || arg[0] == '\0')
> > > > +return -1;
> > > > +
> > > > +errno = 0;
> > > > +bitwidth = strtoul(arg, &end, 0);
> > > > +
> > > > +/* check for errors */
> > > > +if (bitwidth > UINT16_MAX || errno != 0 || end == NULL || *end != '\0')
> > > > +return -1;
> > > > +
> > > > +if (bitwidth == 0)
> > > > +bitwidth = UINT16_MAX;
> > > > +ret = rte_set_max_simd_bitwidth(bitwidth);
> > > > +if (ret < 0)
> > > > +return -1;
> > > > +internal_conf->max_simd_bitwidth.locked = locked;
> > > > +return 0;
> > > > +}
> > > > +
> > > >  static int
> > > >  eal_parse_base_virtaddr(const char *arg)
> > > >  {
> > > > @@ -1707,6 +1736,13 @@ eal_parse_common_option(int opt, const char *optarg,
> > > >  case OPT_NO_TELEMETRY_NUM:
> > > >  conf->no_telemetry = 1;
> > > >  break;
> > > > +case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
> > > > +if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
> > > > +RTE_LOG(ERR, EAL, "invalid parameter for --"
> > > > +OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
> > > > +return -1;
> > > > +}
> > > > +break;
> > > >
> > > >  /* don't know what to do, leave this to caller */
> > > >  default:
> > > > @@ -1903,6 +1939,33 @@ eal_check_common_options(struct internal_config *internal_cfg)
> > > >  return 0;
> > > >  }
> > > >
> > > > +uint16_t
> > > > +rte_get_max_simd_bitwidth(void)
> > > > +{
> > > > +const struct internal_config *internal_conf =
> > > > +eal_get_internal_configuration();
> > > > +return internal_conf->max_simd_bitwidth.bitwidth;
> > > > +}
> > > > +
> > > > +int
> > > > +rte_set_max_simd_bitwidth(uint16_t bitwidth)
> > > > +{
> > > > +struct internal_config *internal_conf =
> > > > +eal_get_internal_configuration();
> > > > +if (internal_conf->max_simd_bitwidth.locked) {
> > > > +RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
> > > > +return -EPERM;
> > > > +}
> > > > +
> > > > +if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth < RTE_NO_SIMD ||
> > > > +!rte_is_power_of_2(bitwidth))) {
> > > > +RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> > > > +return -EINVAL;
> > > > +}
> > > > +internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> > > > +return 0;
> > > > +}
> > > > +
> > > >  void
> > > >  eal_common_usage(void)
> > > >  {
> > > > @@ -1981,6 +2044,7 @@ eal_common_usage(void)
> > > >         "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
> > > >         "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
> > > >         "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
> > > > +       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
> > > >         "\nEAL options for DEBUG use only:\n"
> > > >         "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
> > > >         "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> > > > diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> > > > index 13f93388a7..367e0cc19e 100644
> > > > --- a/lib/librte_eal/common/eal_internal_cfg.h
> > > > +++ b/lib/librte_eal/common/eal_internal_cfg.h
> > > > @@ -33,6 +33,12 @@ struct hugepage_info {
> > > >  int lock_descriptor;    /**< file descriptor for hugepage dir */
> > > >  };
> > > >
> > > > +struct simd_bitwidth {
> > > > +/**< flag indicating if bitwidth is locked from further modification */
> > > > +bool locked;
> > > > +uint16_t bitwidth; /**< bitwidth value */
> > > > +};
> > > > +
> > > >  /**
> > > >   * internal configuration
> > > >   */
> > > > @@ -85,6 +91,8 @@ struct internal_config {
> > > >  volatile unsigned int init_complete;
> > > >  /**< indicates whether EAL has completed initialization */
> > > >  unsigned int no_telemetry; /**< true to disable Telemetry */
> > > > +/** max simd bitwidth path to use */
> > > > +struct simd_bitwidth max_simd_bitwidth;
> > > >  };
> > > >
> > > >  void eal_reset_internal_config(struct internal_config *internal_cfg);
> > > > diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> > > > index 89769d48b4..ef33979664 100644
> > > > --- a/lib/librte_eal/common/eal_options.h
> > > > +++ b/lib/librte_eal/common/eal_options.h
> > > > @@ -85,6 +85,8 @@ enum {
> > > >  OPT_TELEMETRY_NUM,
> > > >  #define OPT_NO_TELEMETRY      "no-telemetry"
> > > >  OPT_NO_TELEMETRY_NUM,
> > > > +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
> > > > +OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
> > > >  OPT_LONG_MAX_NUM
> > > >  };
> > > >
> > > > diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> > > > index ddcf6a2e7a..fb739f3474 100644
> > > > --- a/lib/librte_eal/include/rte_eal.h
> > > > +++ b/lib/librte_eal/include/rte_eal.h
> > > > @@ -43,6 +43,14 @@ enum rte_proc_type_t {
> > > >  RTE_PROC_INVALID
> > > >  };
> > > >
> > > > +enum rte_max_simd_t {
> > > > +RTE_NO_SIMD = 64,
> > >
> > > While I do understand the idea of having that value from consistency point of view,
> > > I wonder do we really need to allow user to specify values smaller then 128.
> > > At least on x86 we always have 128 bit SIMD enabled, even for -Dmachine=default.
> > > So seems no much point to forbid libraries using SSE code-path when compiler
> > > is free to insert SSE instructions on its own will.
> > >
> >
> > The reason to support this is for testing purposes, as it allows an easy
> > way for a tester to check out any scalar code paths - which are often
> > common across architectures.
> 
> If it is just for testing things in a consistent way, then it is  probably ok.
> The thing that worries me - later in this series there are patches
> that insert extra checks into inline functions that use SSE instincts:
> https://patches.dpdk.org/patch/79355/ (lpm: choose vector path at runtime).
> Which seems like a total overkill for me.
> 
> >
> > > > +RTE_MAX_128_SIMD = 128,
> > > > +RTE_MAX_256_SIMD = 256,
> > > > +RTE_MAX_512_SIMD = 512,
> > > > +RTE_MAX_SIMD_DISABLE = UINT16_MAX,
> > >
> > > As a nit, I think it is safe enough to have this last value
> > > (RTE_MAX_SIMD_DISABLE or RTE_MAX_SIMD_MAX) equal to (INT16_MAX + 1).
> > > That would be big enough to probably never hit actual HW limit,
> > > while it still remains power of two, as other values.
> > >
> >
> > I actually think it's probably clearer as-is, because the fact of the rest
> > being powers of 2 is irrelevant since we just check greater than or less
> > than.
> 
> Well, rte_set_max_simd_bitwidth() does accept only power of two values
> _AND_ this special one (UINT16_MAX).
> By changing it to 2^15, we can remove that special value test.
> 
> > If we did change it, then we need to put in a comment explaining why
> > the plus-one,
> 
> I don't think it is that big deal to put a comment,
> plus for UINT16_MAX we do need some explanation too, right?
> 
I'm ok either way. Ciara, what do you think?

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-10-08 14:18           ` Bruce Richardson
@ 2020-10-08 14:26             ` Power, Ciara
  0 siblings, 0 replies; 276+ messages in thread
From: Power, Ciara @ 2020-10-08 14:26 UTC (permalink / raw)
  To: Richardson, Bruce, Ananyev, Konstantin; +Cc: dev, Ray Kinsella, Neil Horman

Hi Bruce, Konstantin,


>-----Original Message-----
>From: Bruce Richardson <bruce.richardson@intel.com>
>Sent: Thursday 8 October 2020 15:18
>To: Ananyev, Konstantin <konstantin.ananyev@intel.com>
>Cc: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org; Ray Kinsella
><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
>Subject: Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
>
>On Thu, Oct 08, 2020 at 03:07:54PM +0100, Ananyev, Konstantin wrote:
>>
>> > On Thu, Oct 08, 2020 at 01:07:26PM +0000, Ananyev, Konstantin wrote:
>> > >
>> > > > This patch adds a max SIMD bitwidth EAL configuration. The API
>> > > > allows for an app to set this value. It can also be set using
>> > > > EAL argument --force-max-simd-bitwidth, which will lock the
>> > > > value and override any modifications made by the app.
>> > > >
>> > > > Signed-off-by: Ciara Power <ciara.power@intel.com>
>> > > >
>> > > > ---
>> > > > v3:
>> > > >   - Added enum value to essentially disable using max SIMD to choose
>> > > >     paths, intended for use by ARM SVE.
>> > > >   - Fixed parsing bitwidth argument to return an error for values
>> > > >     greater than uint16_t.
>> > > > v2: Added to Doxygen comment for API.
>> > > > ---
>> > > >  lib/librte_eal/common/eal_common_options.c | 64
>++++++++++++++++++++++
>> > > >  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>> > > >  lib/librte_eal/common/eal_options.h        |  2 +
>> > > >  lib/librte_eal/include/rte_eal.h           | 33 +++++++++++
>> > > >  lib/librte_eal/rte_eal_version.map         |  4 ++
>> > > >  5 files changed, 111 insertions(+)
>> > > >
>> > > > diff --git a/lib/librte_eal/common/eal_common_options.c
>> > > > b/lib/librte_eal/common/eal_common_options.c
>> > > > index a5426e1234..e9117a96af 100644
>> > > > --- a/lib/librte_eal/common/eal_common_options.c
>> > > > +++ b/lib/librte_eal/common/eal_common_options.c
>> > > > @@ -102,6 +102,7 @@ eal_long_options[] = {
>> > > > {OPT_MATCH_ALLOCATIONS, 0, NULL,
>OPT_MATCH_ALLOCATIONS_NUM},
>> > > >  {OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>> > > >  {OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
>> > > > +{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL,
>> > > > +OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
>> > > >  {0,                     0, NULL, 0                        }
>> > > >  };
>> > > >
>> > > > @@ -1309,6 +1310,34 @@ eal_parse_iova_mode(const char *name)
>> > > > return 0;  }
>> > > >
>> > > > +static int
>> > > > +eal_parse_simd_bitwidth(const char *arg, bool locked) { char
>> > > > +*end; unsigned long bitwidth; int ret; struct internal_config
>> > > > +*internal_conf = eal_get_internal_configuration();
>> > > > +
>> > > > +if (arg == NULL || arg[0] == '\0') return -1;
>> > > > +
>> > > > +errno = 0;
>> > > > +bitwidth = strtoul(arg, &end, 0);
>> > > > +
>> > > > +/* check for errors */
>> > > > +if (bitwidth > UINT16_MAX || errno != 0 || end == NULL || *end
>> > > > +!= '\0') return -1;
>> > > > +
>> > > > +if (bitwidth == 0)
>> > > > +bitwidth = UINT16_MAX;
>> > > > +ret = rte_set_max_simd_bitwidth(bitwidth);
>> > > > +if (ret < 0)
>> > > > +return -1;
>> > > > +internal_conf->max_simd_bitwidth.locked = locked; return 0; }
>> > > > +
>> > > >  static int
>> > > >  eal_parse_base_virtaddr(const char *arg)  { @@ -1707,6 +1736,13
>> > > > @@ eal_parse_common_option(int opt, const char *optarg,  case
>> > > > OPT_NO_TELEMETRY_NUM:
>> > > >  conf->no_telemetry = 1;
>> > > >  break;
>> > > > +case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
>> > > > +if (eal_parse_simd_bitwidth(optarg, 1) < 0) { RTE_LOG(ERR, EAL,
>> > > > +"invalid parameter for --"
>> > > > +OPT_FORCE_MAX_SIMD_BITWIDTH "\n"); return -1; } break;
>> > > >
>> > > >  /* don't know what to do, leave this to caller */
>> > > >  default:
>> > > > @@ -1903,6 +1939,33 @@ eal_check_common_options(struct
>> > > > internal_config *internal_cfg)  return 0;  }
>> > > >
>> > > > +uint16_t
>> > > > +rte_get_max_simd_bitwidth(void) { const struct internal_config
>> > > > +*internal_conf = eal_get_internal_configuration(); return
>> > > > +internal_conf->max_simd_bitwidth.bitwidth;
>> > > > +}
>> > > > +
>> > > > +int
>> > > > +rte_set_max_simd_bitwidth(uint16_t bitwidth) { struct
>> > > > +internal_config *internal_conf =
>> > > > +eal_get_internal_configuration();
>> > > > +if (internal_conf->max_simd_bitwidth.locked) { RTE_LOG(NOTICE,
>> > > > +EAL, "Cannot set max SIMD bitwidth - user runtime override
>> > > > +enabled"); return -EPERM; }
>> > > > +
>> > > > +if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth <
>RTE_NO_SIMD
>> > > > +||
>> > > > +!rte_is_power_of_2(bitwidth))) { RTE_LOG(ERR, EAL, "Invalid
>> > > > +bitwidth value!\n"); return -EINVAL; }
>> > > > +internal_conf->max_simd_bitwidth.bitwidth = bitwidth; return 0;
>> > > > +}
>> > > > +
>> > > >  void
>> > > >  eal_common_usage(void)
>> > > >  {
>> > > > @@ -1981,6 +2044,7 @@ eal_common_usage(void)
>> > > >         "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
>> > > >         "  --"OPT_TELEMETRY"   Enable telemetry support (on by
>default)\n"
>> > > >         "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
>> > > > +       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD
>bitwidth\n"
>> > > >         "\nEAL options for DEBUG use only:\n"
>> > > >         "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
>> > > >         "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
>> > > > diff --git a/lib/librte_eal/common/eal_internal_cfg.h
>b/lib/librte_eal/common/eal_internal_cfg.h
>> > > > index 13f93388a7..367e0cc19e 100644
>> > > > --- a/lib/librte_eal/common/eal_internal_cfg.h
>> > > > +++ b/lib/librte_eal/common/eal_internal_cfg.h
>> > > > @@ -33,6 +33,12 @@ struct hugepage_info {
>> > > >  int lock_descriptor;    /**< file descriptor for hugepage dir */
>> > > >  };
>> > > >
>> > > > +struct simd_bitwidth {
>> > > > +/**< flag indicating if bitwidth is locked from further modification */
>> > > > +bool locked;
>> > > > +uint16_t bitwidth; /**< bitwidth value */
>> > > > +};
>> > > > +
>> > > >  /**
>> > > >   * internal configuration
>> > > >   */
>> > > > @@ -85,6 +91,8 @@ struct internal_config {
>> > > >  volatile unsigned int init_complete;
>> > > >  /**< indicates whether EAL has completed initialization */
>> > > >  unsigned int no_telemetry; /**< true to disable Telemetry */
>> > > > +/** max simd bitwidth path to use */
>> > > > +struct simd_bitwidth max_simd_bitwidth;
>> > > >  };
>> > > >
>> > > >  void eal_reset_internal_config(struct internal_config *internal_cfg);
>> > > > diff --git a/lib/librte_eal/common/eal_options.h
>b/lib/librte_eal/common/eal_options.h
>> > > > index 89769d48b4..ef33979664 100644
>> > > > --- a/lib/librte_eal/common/eal_options.h
>> > > > +++ b/lib/librte_eal/common/eal_options.h
>> > > > @@ -85,6 +85,8 @@ enum {
>> > > >  OPT_TELEMETRY_NUM,
>> > > >  #define OPT_NO_TELEMETRY      "no-telemetry"
>> > > >  OPT_NO_TELEMETRY_NUM,
>> > > > +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-
>bitwidth"
>> > > > +OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
>> > > >  OPT_LONG_MAX_NUM
>> > > >  };
>> > > >
>> > > > diff --git a/lib/librte_eal/include/rte_eal.h
>b/lib/librte_eal/include/rte_eal.h
>> > > > index ddcf6a2e7a..fb739f3474 100644
>> > > > --- a/lib/librte_eal/include/rte_eal.h
>> > > > +++ b/lib/librte_eal/include/rte_eal.h
>> > > > @@ -43,6 +43,14 @@ enum rte_proc_type_t {
>> > > >  RTE_PROC_INVALID
>> > > >  };
>> > > >
>> > > > +enum rte_max_simd_t {
>> > > > +RTE_NO_SIMD = 64,
>> > >
>> > > While I do understand the idea of having that value from consistency
>point of view,
>> > > I wonder do we really need to allow user to specify values smaller then
>128.
>> > > At least on x86 we always have 128 bit SIMD enabled, even for -
>Dmachine=default.
>> > > So seems no much point to forbid libraries using SSE code-path when
>compiler
>> > > is free to insert SSE instructions on its own will.
>> > >
>> >
>> > The reason to support this is for testing purposes, as it allows an easy
>> > way for a tester to check out any scalar code paths - which are often
>> > common across architectures.
>>
>> If it is just for testing things in a consistent way, then it is  probably ok.
>> The thing that worries me - later in this series there are patches
>> that insert extra checks into inline functions that use SSE instincts:
>> https://patches.dpdk.org/patch/79355/ (lpm: choose vector path at
>runtime).
>> Which seems like a total overkill for me.
>>
>> >
>> > > > +RTE_MAX_128_SIMD = 128,
>> > > > +RTE_MAX_256_SIMD = 256,
>> > > > +RTE_MAX_512_SIMD = 512,
>> > > > +RTE_MAX_SIMD_DISABLE = UINT16_MAX,
>> > >
>> > > As a nit, I think it is safe enough to have this last value
>> > > (RTE_MAX_SIMD_DISABLE or RTE_MAX_SIMD_MAX) equal to
>(INT16_MAX + 1).
>> > > That would be big enough to probably never hit actual HW limit,
>> > > while it still remains power of two, as other values.
>> > >
>> >
>> > I actually think it's probably clearer as-is, because the fact of the rest
>> > being powers of 2 is irrelevant since we just check greater than or less
>> > than.
>>
>> Well, rte_set_max_simd_bitwidth() does accept only power of two values
>> _AND_ this special one (UINT16_MAX).
>> By changing it to 2^15, we can remove that special value test.
>>
>> > If we did change it, then we need to put in a comment explaining why
>> > the plus-one,
>>
>> I don't think it is that big deal to put a comment,
>> plus for UINT16_MAX we do need some explanation too, right?
>>
>I'm ok either way. Ciara, what do you think?

Either is fine with me, I can change it to (INT16_MAX + 1) if that is preferred, and remove the extra special case check in the rte_set_max_simd_bitwidth()

Thanks,
Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
  2020-09-30 13:54     ` Medvedkin, Vladimir
@ 2020-10-08 14:40       ` Ananyev, Konstantin
  2020-10-09 14:31         ` Power, Ciara
  0 siblings, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-08 14:40 UTC (permalink / raw)
  To: Medvedkin, Vladimir, Power, Ciara, dev
  Cc: Richardson, Bruce, Jerin Jacob, Ruifeng Wang

> 
> Hi Ciara,
> 
> 
> On 30/09/2020 14:04, Ciara Power wrote:
> > When choosing the vector path, max SIMD bitwidth is now checked to
> > ensure a vector path is allowable. To do this, rather than the vector
> > lookup functions being called directly from apps, a generic lookup
> > function is called which will call the vector functions if suitable.
> >
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > ---
> >   lib/librte_lpm/rte_lpm.h         | 57 ++++++++++++++++++++++++++------
> >   lib/librte_lpm/rte_lpm_altivec.h |  2 +-
> >   lib/librte_lpm/rte_lpm_neon.h    |  2 +-
> >   lib/librte_lpm/rte_lpm_sse.h     |  2 +-
> >   4 files changed, 50 insertions(+), 13 deletions(-)
> >
> > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> > index 03da2d37e0..edba7cafd5 100644
> > --- a/lib/librte_lpm/rte_lpm.h
> > +++ b/lib/librte_lpm/rte_lpm.h
> > @@ -397,8 +397,18 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
> >   /* Mask four results. */
> >   #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ffffff00ffffff)
> >
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > +#include "rte_lpm_neon.h"
> > +#elif defined(RTE_ARCH_PPC_64)
> > +#include "rte_lpm_altivec.h"
> > +#else
> > +#include "rte_lpm_sse.h"
> > +#endif
> > +
> >   /**
> > - * Lookup four IP addresses in an LPM table.
> > + * Lookup four IP addresses in an LPM table individually by calling the
> > + * lookup function for each ip. This is used when lookupx4 is called but
> > + * the vector path is not suitable.
> >    *
> >    * @param lpm
> >    *   LPM object handle
> > @@ -417,16 +427,43 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
> >    *   if lookup would fail.
> >    */
> >   static inline void
> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > -	uint32_t defv);
> > +rte_lpm_lookupx4_scalar(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +	uint32_t defv)
> > +{
> > +	int i;
> > +	for (i = 0; i < 4; i++)
> > +		if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
> > +			hop[i] = defv; /* lookupx4 expected to set on failure */
> > +}
> >
> > -#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > -#include "rte_lpm_neon.h"
> > -#elif defined(RTE_ARCH_PPC_64)
> > -#include "rte_lpm_altivec.h"
> > -#else
> > -#include "rte_lpm_sse.h"
> > -#endif
> > +/**
> > + * Lookup four IP addresses in an LPM table.
> > + *
> > + * @param lpm
> > + *   LPM object handle
> > + * @param ip
> > + *   Four IPs to be looked up in the LPM table
> > + * @param hop
> > + *   Next hop of the most specific rule found for IP (valid on lookup hit only).
> > + *   This is an 4 elements array of two byte values.
> > + *   If the lookup was successful for the given IP, then least significant byte
> > + *   of the corresponding element is the  actual next hop and the most
> > + *   significant byte is zero.
> > + *   If the lookup for the given IP failed, then corresponding element would
> > + *   contain default value, see description of then next parameter.
> > + * @param defv
> > + *   Default value to populate into corresponding element of hop[] array,
> > + *   if lookup would fail.
> > + */
> > +static inline void
> > +rte_lpm_lookupx4(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +	uint32_t defv)
> > +{
> > +	if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
> > +		rte_lpm_lookupx4_vec(lpm, ip, hop, defv);
> > +	else
> > +		rte_lpm_lookupx4_scalar(lpm, ip, hop, defv);
> > +}
> 
> I'm afraid this will lead to a drop in performance. rte_lpm_lookupx4 is
> used in the hot path, and a bulk size is too small to amortize the cost
> of adding this extra logic.

I do share Vladimir's concern regarding performance here.
As I said in other mail - it seems not much point to insert
these checks into inline SSE specific function, as SSE is enabled
by default for all x86 builds. 

As another more generic thought - might be better to avoid
these checks in other public SIMD-specific inline functions (if any).
If such function get called from some .c, then at least such SIMD
ISA is already enabled for that .c file and I think this check should be
left for caller to do.     
 
> >
> >   #ifdef __cplusplus
> >   }
> > diff --git a/lib/librte_lpm/rte_lpm_altivec.h b/lib/librte_lpm/rte_lpm_altivec.h
> > index 228c41b38e..82142d3351 100644
> > --- a/lib/librte_lpm/rte_lpm_altivec.h
> > +++ b/lib/librte_lpm/rte_lpm_altivec.h
> > @@ -16,7 +16,7 @@ extern "C" {
> >   #endif
> >
> >   static inline void
> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> >   	uint32_t defv)
> >   {
> >   	vector signed int i24;
> > diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
> > index 6c131d3125..14b184515d 100644
> > --- a/lib/librte_lpm/rte_lpm_neon.h
> > +++ b/lib/librte_lpm/rte_lpm_neon.h
> > @@ -16,7 +16,7 @@ extern "C" {
> >   #endif
> >
> >   static inline void
> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> >   	uint32_t defv)
> >   {
> >   	uint32x4_t i24;
> > diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
> > index 44770b6ff8..cb5477c6cf 100644
> > --- a/lib/librte_lpm/rte_lpm_sse.h
> > +++ b/lib/librte_lpm/rte_lpm_sse.h
> > @@ -15,7 +15,7 @@ extern "C" {
> >   #endif
> >
> >   static inline void
> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> >   	uint32_t defv)
> >   {
> >   	__m128i i24;
> >
> 
> --
> Regards,
> Vladimir

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-10-06 10:00             ` Olivier Matz
  2020-10-07 11:16               ` Power, Ciara
@ 2020-10-08 14:55               ` Ananyev, Konstantin
  2020-10-13 11:27                 ` Power, Ciara
  1 sibling, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-08 14:55 UTC (permalink / raw)
  To: Olivier Matz, Power, Ciara
  Cc: Coyle, David, Singh, Jasvinder, dev, O'loingsigh, Mairtin,
	Ryan, Brendan, Richardson, Bruce

> > >> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power When
> > >> > > choosing a vector path to take, an extra condition must be
> > >> > > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> > >path.
> > >> > >
> > >> > > The vector path was initially chosen in RTE_INIT, however this is
> > >> > > no longer suitable as we cannot check the max SIMD bitwidth at that
> > >time.
> > >> > > The default chosen in RTE_INIT is now scalar. For best performance
> > >> > > and to use vector paths, apps must explicitly call the set
> > >> > > algorithm function before using other functions from this library,
> > >> > > as this is where vector handlers are now chosen.
> > >> >
> > >> > [DC] Has it been decided that it is ok to now require applications
> > >> > to pick the CRC algorithm they want to use?
> > >> >
> > >> > An application which previously automatically got SSE4.2 CRC, for
> > >> > example, will now automatically only get scalar.
> > >> >
> > >> > If this is ok, this should probably be called out explicitly in
> > >> > release notes as it may not be Immediately noticeable to users that
> > >> > they now need to select the CRC algo.
> > >> >
> > >> > Actually, in general, the release notes need to be updated for this
> > >> patchset.
> > >>
> > >> The decision to move rte_set_alg() out of RTE_INIT was taken to avoid
> > >> check on max_simd_bitwidth in data path for every single time when
> > >> crc_calc() api is invoked. Based on my understanding,
> > >> max_simd_bitwidth is set after eal init, and when used in crc_calc(),
> > >> it might override the default crc algo set during RTE_INIT. Therefore,
> > >> to avoid extra check on max_simd_bitwidth in data path,  better option
> > >> will be to use this static configuration one time after eal init in the set_algo
> > >API.
> > >
> > >[DC] Yes that is a good change to have made to avoid extra datapath checks.
> > >
> > >Based on off-list discussion, I now also know the reason behind now
> > >defaulting to scalar CRC in RTE_INIT. If a higher bitwidth CRC was chosen by
> > >RTE_INIT (e.g.
> > >SSE4.2 CRC) but the max_simd_bitwidth was then set to RTE_NO_SIMD (64)
> > >through the EAL parameter or call to rte_set_max_simd_bitwidth(), then
> > >there is a mismatch if rte_net_crc_set_alg() is not then called to reconfigure
> > >the CRC. Defaulting to scalar avoids this mismatch and works on all archs
> > >
> > >As I mentioned before, I think this needs to be called out in release notes, as
> > >it's an under-the-hood change which could cause app performance to drop if
> > >app developers aren't aware of it - the API itself hasn't changed, so they may
> > >not read the doxygen :)
> > >
> >
> > Yes that is a good point, I can add to the release notes for this to call it out.
> 
> I don't think it is a good idea to have the scalar crc by default.
> To me, the fastest available CRC has to be enabled by default.
> 
> I understand the technical reason why you did it like this however: the
> SIMD bitwidth may not be known at the time the
> RTE_INIT(rte_net_crc_init) function is called.
> 
> A simple approach to solve this issue would be to initialize the
> rte_net_crc_handler pointer to a handlers_default. The first time a crc
> is called, the rte_crc32_*_default_handler() function would check the
> configured SIMD bitwidth, and set the handler to the correct one, to
> avoid to do the test for next time.
> 
> This approach still does not solve the case where the SIMD bitwidth is
> modified during the life of the application. In this case, a callback
> would have to be registered to notify SIMD bitwidth changes... but I
> don't think it is worth to do it. Instead, it can be documented that
> rte_set_max_simd_bitwidth() has to be called early, before
> rte_eal_init().

Actually I also thought about callback approach.
It does complicate things a bit for sure, but on a positive side -
it allows to solve RTE_INIT() code-path selection problem
in a generic way, plus it means zero changes in the data-path. 
So probably worth to consider it.

> 
> 
> 
> > >>
> > >>
> > >> > >
> > >> > > Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
> > >> > >
> > >> > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > >> > >
> > >> > > ---
> > >> > > v3:
> > >> > >   - Moved choosing vector paths out of RTE_INIT.
> > >> > >   - Moved checking max_simd_bitwidth into the set_alg function.
> > >> > > ---
> > >> > >  lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++---------
> > >> > > lib/librte_net/rte_net_crc.h |  3 ++-
> > >> > >  2 files changed, 19 insertions(+), 10 deletions(-)
> > >> > >
> > >> > > diff --git a/lib/librte_net/rte_net_crc.c
> > >> > > b/lib/librte_net/rte_net_crc.c index
> > >> > > 9fd4794a9d..241eb16399 100644
> > >> > > --- a/lib/librte_net/rte_net_crc.c
> > >> > > +++ b/lib/librte_net/rte_net_crc.c
> > >> >
> > >> > <snip>
> > >> >
> > >> > > @@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data,
> > >> > > uint32_t data_len)  void  rte_net_crc_set_alg(enum rte_net_crc_alg
> > >> > > alg)  {
> > >> > > +	if (max_simd_bitwidth == 0)
> > >> > > +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> > >> > > +
> > >> > >  	switch (alg) {
> > >> > >  #ifdef X86_64_SSE42_PCLMULQDQ
> > >> > >  	case RTE_NET_CRC_SSE42:
> > >> > > -		handlers = handlers_sse42;
> > >> > > -		break;
> > >> > > +		if (max_simd_bitwidth >= RTE_MAX_128_SIMD) {
> > >> > > +			handlers = handlers_sse42;
> > >> > > +			return;
> > >> > > +		}
> > >> > > +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using
> > >> > > scalar\n");
> > >> >
> > >> > [DC] Not sure if you're aware but there is another patchset which
> > >> > adds an
> > >> > AVX512 CRC implementation and run-time checking of cpuflags to
> > >> > select the CRC path to use:
> > >> > https://patchwork.dpdk.org/project/dpdk/list/?series=12596
> > >> >
> > >> > There will be a task to merge these 2 patchsets if both are merged.
> > >> > It looks fairly straightforward to me to merge these, but it would
> > >> > be good if you take a look too
> >
> > I have looked at that patchset, I agree, I think they will be straightforward to merge together.
> >
> > Thanks,
> > Ciara

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 11/18] net/ixgbe: " Ciara Power
@ 2020-10-08 15:05     ` Ananyev, Konstantin
  2020-10-10 13:13     ` Wang, Haiyue
  1 sibling, 0 replies; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-08 15:05 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Power, Ciara, Zhao1, Wei, Guo, Jia, Wang, Haiyue



> 
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> Cc: Wei Zhao <wei.zhao1@intel.com>
> Cc: Jeff Guo <jia.guo@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 977ecf5137..eadc7183f2 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
>  		dev->tx_pkt_prepare = NULL;
>  		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
>  				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> -					ixgbe_txq_vec_setup(txq) == 0)) {
> +					ixgbe_txq_vec_setup(txq) == 0) &&
> +				rte_get_max_simd_bitwidth()
> +				>= RTE_MAX_128_SIMD) {

As a nit - I think it is a bit safer to do all checks first before doing txq_vec_setup().
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
 

>  			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
>  			dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
>  		} else
> @@ -4743,7 +4745,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
>  	 * conditions to be met and Rx Bulk Allocation should be allowed.
>  	 */
>  	if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
> -	    !adapter->rx_bulk_alloc_allowed) {
> +	    !adapter->rx_bulk_alloc_allowed ||
> +			rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD) {
>  		PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
>  				    "preconditions",
>  			     dev->data->port_id);
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime Ciara Power
  2020-09-30 13:54     ` Medvedkin, Vladimir
@ 2020-10-08 15:19     ` David Marchand
  2020-10-09 12:37       ` David Marchand
  1 sibling, 1 reply; 276+ messages in thread
From: David Marchand @ 2020-10-08 15:19 UTC (permalink / raw)
  To: Ciara Power
  Cc: dev, Bruce Richardson, Vladimir Medvedkin, Jerin Jacob, Ruifeng Wang

On Wed, Sep 30, 2020 at 3:14 PM Ciara Power <ciara.power@intel.com> wrote:
>
> When choosing the vector path, max SIMD bitwidth is now checked to
> ensure a vector path is allowable. To do this, rather than the vector
> lookup functions being called directly from apps, a generic lookup
> function is called which will call the vector functions if suitable.
>
> Signed-off-by: Ciara Power <ciara.power@intel.com>

Got a build error on this patch with ./devtools/test-meson-builds.sh
("gcc-shared" target):

[2/3] Compiling C object
'examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o'.
FAILED: examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o
ccache gcc -Iexamples/c590b3c@@dpdk-l3fwd-thread@exe -Iexamples
-I../../dpdk/examples -Iexamples/performance-thread/l3fwd-thread
-I../../dpdk/examples/performance-thread/l3fwd-thread
-I../../dpdk/examples/performance-thread/l3fwd-thread/../common
-I../../dpdk/examples/performance-thread/l3fwd-thread/../common/arch/x86
-I. -I../../dpdk/ -Iconfig -I../../dpdk/config
-Ilib/librte_eal/include -I../../dpdk/lib/librte_eal/include
-Ilib/librte_eal/linux/include
-I../../dpdk/lib/librte_eal/linux/include -Ilib/librte_eal/x86/include
-I../../dpdk/lib/librte_eal/x86/include -Ilib/librte_eal/common
-I../../dpdk/lib/librte_eal/common -Ilib/librte_eal
-I../../dpdk/lib/librte_eal -Ilib/librte_kvargs
-I../../dpdk/lib/librte_kvargs
-Ilib/librte_telemetry/../librte_metrics
-I../../dpdk/lib/librte_telemetry/../librte_metrics
-Ilib/librte_telemetry -I../../dpdk/lib/librte_telemetry
-Ilib/librte_mempool -I../../dpdk/lib/librte_mempool -Ilib/librte_ring
-I../../dpdk/lib/librte_ring -Ilib/librte_net
-I../../dpdk/lib/librte_net -Ilib/librte_mbuf
-I../../dpdk/lib/librte_mbuf -Ilib/librte_ethdev
-I../../dpdk/lib/librte_ethdev -Ilib/librte_meter
-I../../dpdk/lib/librte_meter -Ilib/librte_cmdline
-I../../dpdk/lib/librte_cmdline -Ilib/librte_timer
-I../../dpdk/lib/librte_timer -Ilib/librte_lpm
-I../../dpdk/lib/librte_lpm -Ilib/librte_hash
-I../../dpdk/lib/librte_hash -Ilib/librte_rcu
-I../../dpdk/lib/librte_rcu
-I/home/dmarchan/intel-ipsec-mb/install/include
-fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall
-Winvalid-pch -Werror -O2 -g -include rte_config.h -Wextra -Wcast-qual
-Wdeprecated -Wformat-nonliteral -Wformat-security
-Wmissing-declarations -Wmissing-prototypes -Wnested-externs
-Wold-style-definition -Wpointer-arith -Wsign-compare
-Wstrict-prototypes -Wundef -Wwrite-strings
-Wno-address-of-packed-member -Wno-packed-not-aligned
-Wno-missing-field-initializers -D_GNU_SOURCE -march=native
-Wno-format-truncation -DALLOW_EXPERIMENTAL_API -MD -MQ
'examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o'
-MF 'examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o.d'
-o 'examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o'
-c ../../dpdk/examples/performance-thread/l3fwd-thread/main.c
#‘target_mem_ref’ not supported by expression#’In file included from
../../dpdk/examples/performance-thread/l3fwd-thread/main.c:133:
../../dpdk/examples/performance-thread/l3fwd-thread/main.c: In
function ‘process_burst’:
../../dpdk/lib/librte_lpm/rte_lpm.h:435:7: error:  may be used
uninitialized in this function [-Werror=maybe-uninitialized]
  435 |   if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
      |       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
ninja: build stopped: subcommand failed.

gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2)


-- 
David Marchand


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for " Ciara Power
@ 2020-10-08 15:21     ` Ananyev, Konstantin
  2020-10-08 16:05       ` Power, Ciara
  2020-10-09  3:02     ` Guo, Jia
  1 sibling, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-08 15:21 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Power, Ciara, Xing, Beilei, Guo, Jia


> 
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> Cc: Beilei Xing <beilei.xing@intel.com>
> Cc: Jeff Guo <jia.guo@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> index 60b33d20a1..9b535b52fa 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -3098,7 +3098,8 @@ static eth_rx_burst_t
>  i40e_get_latest_rx_vec(bool scatter)
>  {
>  #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> +			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
>  		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
>  				 i40e_recv_pkts_vec_avx2;

Hmm, but that means - if user will set --simd-bitwidth=128 we'll select scalar function, right?
Even though sse one is available.
Is that what we really want in that case?

>  #endif
> @@ -3115,7 +3116,8 @@ i40e_get_recommend_rx_vec(bool scatter)
>  	 * use of AVX2 version to later plaforms, not all those that could
>  	 * theoretically run it.
>  	 */
> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
> +			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
>  		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
>  				 i40e_recv_pkts_vec_avx2;
>  #endif
> @@ -3154,7 +3156,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>  		}
>  	}
> 
> -	if (ad->rx_vec_allowed) {
> +	if (ad->rx_vec_allowed  && rte_get_max_simd_bitwidth()
> +			>= RTE_MAX_128_SIMD) {
>  		/* Vec Rx path */
>  		PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
>  				dev->data->port_id);
> @@ -3268,7 +3271,8 @@ static eth_tx_burst_t
>  i40e_get_latest_tx_vec(void)
>  {
>  #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> +			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
>  		return i40e_xmit_pkts_vec_avx2;
>  #endif
>  	return i40e_xmit_pkts_vec;
> @@ -3283,7 +3287,8 @@ i40e_get_recommend_tx_vec(void)
>  	 * use of AVX2 version to later plaforms, not all those that could
>  	 * theoretically run it.
>  	 */
> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
> +			rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
>  		return i40e_xmit_pkts_vec_avx2;
>  #endif
>  	return i40e_xmit_pkts_vec;
> @@ -3311,7 +3316,9 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
>  	}
> 
>  	if (ad->tx_simple_allowed) {
> -		if (ad->tx_vec_allowed) {
> +		if (ad->tx_vec_allowed &&
> +				rte_get_max_simd_bitwidth()
> +				>= RTE_MAX_128_SIMD) {
>  			PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
>  			if (ad->use_latest_vec)
>  				dev->tx_pkt_burst =
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 01/18] eal: add max SIMD bitwidth Ciara Power
                       ` (4 preceding siblings ...)
  2020-10-08 13:19     ` Ananyev, Konstantin
@ 2020-10-08 15:28     ` David Marchand
  5 siblings, 0 replies; 276+ messages in thread
From: David Marchand @ 2020-10-08 15:28 UTC (permalink / raw)
  To: Ciara Power; +Cc: dev, Ray Kinsella, Neil Horman

On Wed, Sep 30, 2020 at 3:08 PM Ciara Power <ciara.power@intel.com> wrote:
>
> This patch adds a max SIMD bitwidth EAL configuration. The API allows
> for an app to set this value. It can also be set using EAL argument
> --force-max-simd-bitwidth, which will lock the value and override any
> modifications made by the app.
>
> Signed-off-by: Ciara Power <ciara.power@intel.com>
>
> ---
> v3:
>   - Added enum value to essentially disable using max SIMD to choose
>     paths, intended for use by ARM SVE.
>   - Fixed parsing bitwidth argument to return an error for values
>     greater than uint16_t.
> v2: Added to Doxygen comment for API.
> ---
>  lib/librte_eal/common/eal_common_options.c | 64 ++++++++++++++++++++++
>  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
>  lib/librte_eal/common/eal_options.h        |  2 +
>  lib/librte_eal/include/rte_eal.h           | 33 +++++++++++
>  lib/librte_eal/rte_eal_version.map         |  4 ++
>  5 files changed, 111 insertions(+)
>
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index a5426e1234..e9117a96af 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -102,6 +102,7 @@ eal_long_options[] = {
>         {OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
>         {OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>         {OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> +       {OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
>         {0,                     0, NULL, 0                        }
>  };
>
> @@ -1309,6 +1310,34 @@ eal_parse_iova_mode(const char *name)
>         return 0;
>  }
>
> +static int
> +eal_parse_simd_bitwidth(const char *arg, bool locked)

No need to pass a "locked" bool, we only care about forced value in
this function.


> +{
> +       char *end;
> +       unsigned long bitwidth;
> +       int ret;
> +       struct internal_config *internal_conf =
> +               eal_get_internal_configuration();
> +
> +       if (arg == NULL || arg[0] == '\0')
> +               return -1;
> +
> +       errno = 0;
> +       bitwidth = strtoul(arg, &end, 0);
> +
> +       /* check for errors */
> +       if (bitwidth > UINT16_MAX || errno != 0 || end == NULL || *end != '\0')

Nit: look at bitwidth after checking errno and consorts.


> +               return -1;
> +
> +       if (bitwidth == 0)
> +               bitwidth = UINT16_MAX;
> +       ret = rte_set_max_simd_bitwidth(bitwidth);
> +       if (ret < 0)
> +               return -1;
> +       internal_conf->max_simd_bitwidth.locked = locked;

Please align eal option and internal config field name.

--force-max-simd-bitwidth => .forced ?
And then %s/locked/forced/g


> +       return 0;
> +}
> +
>  static int
>  eal_parse_base_virtaddr(const char *arg)
>  {
> @@ -1707,6 +1736,13 @@ eal_parse_common_option(int opt, const char *optarg,
>         case OPT_NO_TELEMETRY_NUM:
>                 conf->no_telemetry = 1;
>                 break;
> +       case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
> +               if (eal_parse_simd_bitwidth(optarg, 1) < 0) {
> +                       RTE_LOG(ERR, EAL, "invalid parameter for --"
> +                                       OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
> +                       return -1;
> +               }
> +               break;
>
>         /* don't know what to do, leave this to caller */
>         default:
> @@ -1903,6 +1939,33 @@ eal_check_common_options(struct internal_config *internal_cfg)
>         return 0;
>  }
>
> +uint16_t
> +rte_get_max_simd_bitwidth(void)
> +{
> +       const struct internal_config *internal_conf =
> +               eal_get_internal_configuration();
> +       return internal_conf->max_simd_bitwidth.bitwidth;
> +}
> +
> +int
> +rte_set_max_simd_bitwidth(uint16_t bitwidth)
> +{
> +       struct internal_config *internal_conf =
> +               eal_get_internal_configuration();
> +       if (internal_conf->max_simd_bitwidth.locked) {
> +               RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
> +               return -EPERM;
> +       }
> +
> +       if (bitwidth != RTE_MAX_SIMD_DISABLE && (bitwidth < RTE_NO_SIMD ||
> +                       !rte_is_power_of_2(bitwidth))) {
> +               RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
> +               return -EINVAL;
> +       }
> +       internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
> +       return 0;
> +}
> +
>  void
>  eal_common_usage(void)
>  {
> @@ -1981,6 +2044,7 @@ eal_common_usage(void)
>                "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
>                "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
>                "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
> +              "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
>                "\nEAL options for DEBUG use only:\n"
>                "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
>                "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 13f93388a7..367e0cc19e 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -33,6 +33,12 @@ struct hugepage_info {
>         int lock_descriptor;    /**< file descriptor for hugepage dir */
>  };
>
> +struct simd_bitwidth {
> +       /**< flag indicating if bitwidth is locked from further modification */
> +       bool locked;
> +       uint16_t bitwidth; /**< bitwidth value */
> +};
> +
>  /**
>   * internal configuration
>   */
> @@ -85,6 +91,8 @@ struct internal_config {
>         volatile unsigned int init_complete;
>         /**< indicates whether EAL has completed initialization */
>         unsigned int no_telemetry; /**< true to disable Telemetry */
> +       /** max simd bitwidth path to use */

/**<


> +       struct simd_bitwidth max_simd_bitwidth;
>  };
>
>  void eal_reset_internal_config(struct internal_config *internal_cfg);
> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> index 89769d48b4..ef33979664 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -85,6 +85,8 @@ enum {
>         OPT_TELEMETRY_NUM,
>  #define OPT_NO_TELEMETRY      "no-telemetry"
>         OPT_NO_TELEMETRY_NUM,
> +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
> +       OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
>         OPT_LONG_MAX_NUM
>  };
>
> diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> index ddcf6a2e7a..fb739f3474 100644
> --- a/lib/librte_eal/include/rte_eal.h
> +++ b/lib/librte_eal/include/rte_eal.h
> @@ -43,6 +43,14 @@ enum rte_proc_type_t {
>         RTE_PROC_INVALID
>  };
>
> +enum rte_max_simd_t {
> +       RTE_NO_SIMD = 64,
> +       RTE_MAX_128_SIMD = 128,
> +       RTE_MAX_256_SIMD = 256,
> +       RTE_MAX_512_SIMD = 512,
> +       RTE_MAX_SIMD_DISABLE = UINT16_MAX,
> +};
> +
>  /**
>   * Get the process type in a multi-process setup
>   *
> @@ -51,6 +59,31 @@ enum rte_proc_type_t {
>   */
>  enum rte_proc_type_t rte_eal_process_type(void);
>
> +/**
> + * Get the supported SIMD bitwidth.
> + *
> + * @return
> + *   uint16_t bitwidth.
> + */
> +__rte_experimental
> +uint16_t rte_get_max_simd_bitwidth(void);
> +
> +/**
> + * Set the supported SIMD bitwidth.
> + * This API should only be called once at initialization, before EAL init.
> + *
> + * @param bitwidth
> + *   uint16_t bitwidth.
> + * @return
> + *   0 on success.
> + * @return
> + *   -EINVAL on invalid bitwidth parameter.
> + * @return
> + *   -EPERM if bitwidth is locked.

A single @return with a bullet list is preferred (note to self: fix
rte_eal_cleanup).

@return
 - 0 on success.
 - -EINVAL on invalid bitwidth parameter.
 - -EPERM if bitwidth is forced.



> + */
> +__rte_experimental
> +int rte_set_max_simd_bitwidth(uint16_t bitwidth);
> +
>  /**
>   * Request iopl privilege for all RPL.
>   *
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index c32461c663..17a7195a3d 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -397,6 +397,10 @@ EXPERIMENTAL {
>         rte_service_lcore_may_be_active;
>         rte_thread_register;
>         rte_thread_unregister;
> +
> +       # added in 20.11
> +       rte_get_max_simd_bitwidth;
> +       rte_set_max_simd_bitwidth;
>  };
>
>  INTERNAL {
> --
> 2.17.1
>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
  2020-10-08 15:21     ` Ananyev, Konstantin
@ 2020-10-08 16:05       ` Power, Ciara
  2020-10-08 16:14         ` Ananyev, Konstantin
  0 siblings, 1 reply; 276+ messages in thread
From: Power, Ciara @ 2020-10-08 16:05 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev; +Cc: Xing, Beilei, Guo, Jia, Richardson, Bruce

Hi Konstantin,

 
>-----Original Message-----
>From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
>Sent: Thursday 8 October 2020 16:22
>To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
>Cc: Power, Ciara <ciara.power@intel.com>; Xing, Beilei
><beilei.xing@intel.com>; Guo, Jia <jia.guo@intel.com>
>Subject: RE: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD
>bitwidth
>
>
>>
>> When choosing a vector path to take, an extra condition must be
>> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
>> path.
>>
>> Cc: Beilei Xing <beilei.xing@intel.com>
>> Cc: Jeff Guo <jia.guo@intel.com>
>>
>> Signed-off-by: Ciara Power <ciara.power@intel.com>
>> ---
>>  drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
>>  1 file changed, 13 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/net/i40e/i40e_rxtx.c
>> b/drivers/net/i40e/i40e_rxtx.c index 60b33d20a1..9b535b52fa 100644
>> --- a/drivers/net/i40e/i40e_rxtx.c
>> +++ b/drivers/net/i40e/i40e_rxtx.c
>> @@ -3098,7 +3098,8 @@ static eth_rx_burst_t
>> i40e_get_latest_rx_vec(bool scatter)  {  #if defined(RTE_ARCH_X86) &&
>> defined(CC_AVX2_SUPPORT)
>> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
>> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
>> +			rte_get_max_simd_bitwidth() >=
>RTE_MAX_256_SIMD)
>>  		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
>>  				 i40e_recv_pkts_vec_avx2;
>
>Hmm, but that means - if user will set --simd-bitwidth=128 we'll select
>scalar function, right?
>Even though sse one is available.
>Is that what we really want in that case?
>

If the max SIMD is 128, the second return in this function is used, which I believe is SSE:

	return scatter ? i40e_recv_scattered_pkts_vec :
			 i40e_recv_pkts_vec;

And that function is only called if the max SIMD is >=128, scalar is used otherwise.

Am I missing something else here?

Thanks,
Ciara 

>>  #endif
>> @@ -3115,7 +3116,8 @@ i40e_get_recommend_rx_vec(bool scatter)
>>  	 * use of AVX2 version to later plaforms, not all those that could
>>  	 * theoretically run it.
>>  	 */
>> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
>> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
>> +			rte_get_max_simd_bitwidth() >=
>RTE_MAX_256_SIMD)
>>  		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
>>  				 i40e_recv_pkts_vec_avx2;
>>  #endif
>> @@ -3154,7 +3156,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>>  		}
>>  	}
>>
>> -	if (ad->rx_vec_allowed) {
>> +	if (ad->rx_vec_allowed  && rte_get_max_simd_bitwidth()
>> +			>= RTE_MAX_128_SIMD) {
>>  		/* Vec Rx path */
>>  		PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on
>port=%d.",
>>  				dev->data->port_id);
>> @@ -3268,7 +3271,8 @@ static eth_tx_burst_t
>>  i40e_get_latest_tx_vec(void)
>>  {
>>  #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
>> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
>> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
>> +			rte_get_max_simd_bitwidth() >=
>RTE_MAX_256_SIMD)
>>  		return i40e_xmit_pkts_vec_avx2;
>>  #endif
>>  	return i40e_xmit_pkts_vec;
>> @@ -3283,7 +3287,8 @@ i40e_get_recommend_tx_vec(void)
>>  	 * use of AVX2 version to later plaforms, not all those that could
>>  	 * theoretically run it.
>>  	 */
>> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
>> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
>> +			rte_get_max_simd_bitwidth() >=
>RTE_MAX_256_SIMD)
>>  		return i40e_xmit_pkts_vec_avx2;
>>  #endif
>>  	return i40e_xmit_pkts_vec;
>> @@ -3311,7 +3316,9 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
>>  	}
>>
>>  	if (ad->tx_simple_allowed) {
>> -		if (ad->tx_vec_allowed) {
>> +		if (ad->tx_vec_allowed &&
>> +				rte_get_max_simd_bitwidth()
>> +				>= RTE_MAX_128_SIMD) {
>>  			PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
>>  			if (ad->use_latest_vec)
>>  				dev->tx_pkt_burst =
>> --
>> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
  2020-10-08 16:05       ` Power, Ciara
@ 2020-10-08 16:14         ` Ananyev, Konstantin
  0 siblings, 0 replies; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-08 16:14 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Xing, Beilei, Guo, Jia, Richardson, Bruce


> Hi Konstantin,
> 
> 
> >-----Original Message-----
> >From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> >Sent: Thursday 8 October 2020 16:22
> >To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> >Cc: Power, Ciara <ciara.power@intel.com>; Xing, Beilei
> ><beilei.xing@intel.com>; Guo, Jia <jia.guo@intel.com>
> >Subject: RE: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD
> >bitwidth
> >
> >
> >>
> >> When choosing a vector path to take, an extra condition must be
> >> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> >> path.
> >>
> >> Cc: Beilei Xing <beilei.xing@intel.com>
> >> Cc: Jeff Guo <jia.guo@intel.com>
> >>
> >> Signed-off-by: Ciara Power <ciara.power@intel.com>
> >> ---
> >>  drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
> >>  1 file changed, 13 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/drivers/net/i40e/i40e_rxtx.c
> >> b/drivers/net/i40e/i40e_rxtx.c index 60b33d20a1..9b535b52fa 100644
> >> --- a/drivers/net/i40e/i40e_rxtx.c
> >> +++ b/drivers/net/i40e/i40e_rxtx.c
> >> @@ -3098,7 +3098,8 @@ static eth_rx_burst_t
> >> i40e_get_latest_rx_vec(bool scatter)  {  #if defined(RTE_ARCH_X86) &&
> >> defined(CC_AVX2_SUPPORT)
> >> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> >> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> >> +			rte_get_max_simd_bitwidth() >=
> >RTE_MAX_256_SIMD)
> >>  		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
> >>  				 i40e_recv_pkts_vec_avx2;
> >
> >Hmm, but that means - if user will set --simd-bitwidth=128 we'll select
> >scalar function, right?
> >Even though sse one is available.
> >Is that what we really want in that case?
> >
> 
> If the max SIMD is 128, the second return in this function is used, which I believe is SSE:
> 
> 	return scatter ? i40e_recv_scattered_pkts_vec :
> 			 i40e_recv_pkts_vec;
> 
> And that function is only called if the max SIMD is >=128, scalar is used otherwise.
> 
> Am I missing something else here?

Nope, you are right, that was me not reading code properly 😊
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 
> Thanks,
> Ciara
> 
> >>  #endif
> >> @@ -3115,7 +3116,8 @@ i40e_get_recommend_rx_vec(bool scatter)
> >>  	 * use of AVX2 version to later plaforms, not all those that could
> >>  	 * theoretically run it.
> >>  	 */
> >> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> >> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
> >> +			rte_get_max_simd_bitwidth() >=
> >RTE_MAX_256_SIMD)
> >>  		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
> >>  				 i40e_recv_pkts_vec_avx2;
> >>  #endif
> >> @@ -3154,7 +3156,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
> >>  		}
> >>  	}
> >>
> >> -	if (ad->rx_vec_allowed) {
> >> +	if (ad->rx_vec_allowed  && rte_get_max_simd_bitwidth()
> >> +			>= RTE_MAX_128_SIMD) {
> >>  		/* Vec Rx path */
> >>  		PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on
> >port=%d.",
> >>  				dev->data->port_id);
> >> @@ -3268,7 +3271,8 @@ static eth_tx_burst_t
> >>  i40e_get_latest_tx_vec(void)
> >>  {
> >>  #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
> >> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> >> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> >> +			rte_get_max_simd_bitwidth() >=
> >RTE_MAX_256_SIMD)
> >>  		return i40e_xmit_pkts_vec_avx2;
> >>  #endif
> >>  	return i40e_xmit_pkts_vec;
> >> @@ -3283,7 +3287,8 @@ i40e_get_recommend_tx_vec(void)
> >>  	 * use of AVX2 version to later plaforms, not all those that could
> >>  	 * theoretically run it.
> >>  	 */
> >> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> >> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
> >> +			rte_get_max_simd_bitwidth() >=
> >RTE_MAX_256_SIMD)
> >>  		return i40e_xmit_pkts_vec_avx2;
> >>  #endif
> >>  	return i40e_xmit_pkts_vec;
> >> @@ -3311,7 +3316,9 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
> >>  	}
> >>
> >>  	if (ad->tx_simple_allowed) {
> >> -		if (ad->tx_vec_allowed) {
> >> +		if (ad->tx_vec_allowed &&
> >> +				rte_get_max_simd_bitwidth()
> >> +				>= RTE_MAX_128_SIMD) {
> >>  			PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
> >>  			if (ad->use_latest_vec)
> >>  				dev->tx_pkt_burst =
> >> --
> >> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 02/18] eal: add default SIMD bitwidth values
  2020-09-30 13:03   ` [dpdk-dev] [PATCH v3 02/18] eal: add default SIMD bitwidth values Ciara Power
  2020-10-05 19:35     ` David Christensen
  2020-10-08 13:17     ` Ananyev, Konstantin
@ 2020-10-08 16:45     ` David Marchand
  2 siblings, 0 replies; 276+ messages in thread
From: David Marchand @ 2020-10-08 16:45 UTC (permalink / raw)
  To: Ciara Power
  Cc: dev, Ruifeng Wang, Jerin Jacob, Honnappa Nagarahalli,
	David Christensen, Jan Viktorin, Bruce Richardson,
	Konstantin Ananyev

On Wed, Sep 30, 2020 at 3:09 PM Ciara Power <ciara.power@intel.com> wrote:
>
> Each arch has a define for the default SIMD bitwidth value, this is used
> on EAL init to set the config max SIMD bitwidth.
>
> Cc: Ruifeng Wang <ruifeng.wang@arm.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Cc: David Christensen <drc@linux.vnet.ibm.com>
>
> Signed-off-by: Ciara Power <ciara.power@intel.com>
>
> ---
> v3:
>   - Removed unnecessary define in generic rte_vect.h
>   - Changed default bitwidth for ARM to UINT16_MAX, to allow for SVE.
> v2: Changed default bitwidth for Arm to 128.
> ---
>  lib/librte_eal/arm/include/rte_vect.h      | 2 ++
>  lib/librte_eal/common/eal_common_options.c | 3 +++
>  lib/librte_eal/ppc/include/rte_vect.h      | 2 ++
>  lib/librte_eal/x86/include/rte_vect.h      | 2 ++
>  4 files changed, 9 insertions(+)
>
> diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
> index 01c51712a1..a3508e69d5 100644
> --- a/lib/librte_eal/arm/include/rte_vect.h
> +++ b/lib/librte_eal/arm/include/rte_vect.h
> @@ -14,6 +14,8 @@
>  extern "C" {
>  #endif
>
> +#define RTE_DEFAULT_SIMD_BITWIDTH UINT16_MAX
> +
>  typedef int32x4_t xmm_t;
>
>  #define        XMM_SIZE        (sizeof(xmm_t))
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index e9117a96af..d412cae89b 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -35,6 +35,7 @@
>  #ifndef RTE_EXEC_ENV_WINDOWS
>  #include <rte_telemetry.h>
>  #endif
> +#include <rte_vect.h>
>
>  #include "eal_internal_cfg.h"
>  #include "eal_options.h"
> @@ -344,6 +345,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
>         internal_cfg->user_mbuf_pool_ops_name = NULL;
>         CPU_ZERO(&internal_cfg->ctrl_cpuset);
>         internal_cfg->init_complete = 0;
> +       internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
> +       internal_cfg->max_simd_bitwidth.locked = 0;

Does the previous patch work without this one?
In any case, it seems a whole to me and reading one of Olivier's
questions on the default value, I would squash them together.



>  }
>
>  static int
> diff --git a/lib/librte_eal/ppc/include/rte_vect.h b/lib/librte_eal/ppc/include/rte_vect.h
> index b0545c878c..70fbd0c423 100644
> --- a/lib/librte_eal/ppc/include/rte_vect.h
> +++ b/lib/librte_eal/ppc/include/rte_vect.h
> @@ -15,6 +15,8 @@
>  extern "C" {
>  #endif
>
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  typedef vector signed int xmm_t;
>
>  #define        XMM_SIZE        (sizeof(xmm_t))
> diff --git a/lib/librte_eal/x86/include/rte_vect.h b/lib/librte_eal/x86/include/rte_vect.h
> index df5a607623..b1df75aca7 100644
> --- a/lib/librte_eal/x86/include/rte_vect.h
> +++ b/lib/librte_eal/x86/include/rte_vect.h
> @@ -35,6 +35,8 @@
>  extern "C" {
>  #endif
>
> +#define RTE_DEFAULT_SIMD_BITWIDTH 256
> +
>  typedef __m128i xmm_t;
>
>  #define        XMM_SIZE        (sizeof(xmm_t))
> --
> 2.17.1
>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 10/18] net/ice: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 10/18] net/ice: " Ciara Power
@ 2020-10-09  0:04     ` Zhang, Qi Z
  2020-10-09  1:05       ` Zhang, Qi Z
  0 siblings, 1 reply; 276+ messages in thread
From: Zhang, Qi Z @ 2020-10-09  0:04 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Yang, Qiming



> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Wednesday, September 30, 2020 9:04 PM
> To: dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Yang, Qiming
> <qiming.yang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> Subject: [PATCH v3 10/18] net/ice: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path.
> 
> Cc: Qiming Yang <qiming.yang@intel.com>
> Cc: Qi Zhang <qi.z.zhang@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>

Acked-by: Qi Zhang <qi.z.zhang@intel.com>

> ---
>  drivers/net/ice/ice_rxtx.c | 20 ++++++++++++++------
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c index
> fef6ad4544..5a29af743c 100644
> --- a/drivers/net/ice/ice_rxtx.c
> +++ b/drivers/net/ice/ice_rxtx.c
> @@ -2936,7 +2936,9 @@ ice_set_rx_function(struct rte_eth_dev *dev)
>  	bool use_avx2 = false;
> 
>  	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> -		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed) {
> +		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed &&
> +				rte_get_max_simd_bitwidth()
> +				>= RTE_MAX_128_SIMD) {
>  			ad->rx_vec_allowed = true;
>  			for (i = 0; i < dev->data->nb_rx_queues; i++) {
>  				rxq = dev->data->rx_queues[i];
> @@ -2946,8 +2948,10 @@ ice_set_rx_function(struct rte_eth_dev *dev)
>  				}
>  			}
> 
> -			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> -			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
> +			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> +			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
> +					rte_get_max_simd_bitwidth()
> +					>= RTE_MAX_256_SIMD)
>  				use_avx2 = true;
> 
>  		} else {
> @@ -3114,7 +3118,9 @@ ice_set_tx_function(struct rte_eth_dev *dev)
>  	bool use_avx2 = false;
> 
>  	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> -		if (!ice_tx_vec_dev_check(dev)) {
> +		if (!ice_tx_vec_dev_check(dev) &&
> +				rte_get_max_simd_bitwidth()
> +				>= RTE_MAX_128_SIMD) {
>  			ad->tx_vec_allowed = true;
>  			for (i = 0; i < dev->data->nb_tx_queues; i++) {
>  				txq = dev->data->tx_queues[i];
> @@ -3124,8 +3130,10 @@ ice_set_tx_function(struct rte_eth_dev *dev)
>  				}
>  			}
> 
> -			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> -			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
> +			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> +			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
> +					rte_get_max_simd_bitwidth()
> +					>= RTE_MAX_256_SIMD)
>  				use_avx2 = true;
> 
>  		} else {
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 08/18] net/fm10k: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 08/18] net/fm10k: " Ciara Power
@ 2020-10-09  0:18     ` Zhang, Qi Z
  0 siblings, 0 replies; 276+ messages in thread
From: Zhang, Qi Z @ 2020-10-09  0:18 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Wang, Xiao W



> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Wednesday, September 30, 2020 9:04 PM
> To: dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Wang, Xiao W <xiao.w.wang@intel.com>
> Subject: [PATCH v3 08/18] net/fm10k: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path.
> 
> Cc: Qi Zhang <qi.z.zhang@intel.com>
> Cc: Xiao Wang <xiao.w.wang@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>

Acked-by: Qi Zhang <qi.z.zhang@intel.com>

> ---
>  drivers/net/fm10k/fm10k_ethdev.c | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/fm10k/fm10k_ethdev.c
> b/drivers/net/fm10k/fm10k_ethdev.c
> index 5771d83b55..a8bc1036a3 100644
> --- a/drivers/net/fm10k/fm10k_ethdev.c
> +++ b/drivers/net/fm10k/fm10k_ethdev.c
> @@ -2930,7 +2930,9 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
>  		/* primary process has set the ftag flag and offloads */
>  		txq = dev->data->tx_queues[0];
> -		if (fm10k_tx_vec_condition_check(txq)) {
> +		if (fm10k_tx_vec_condition_check(txq) ||
> +				rte_get_max_simd_bitwidth()
> +				< RTE_MAX_128_SIMD) {
>  			dev->tx_pkt_burst = fm10k_xmit_pkts;
>  			dev->tx_pkt_prepare = fm10k_prep_pkts;
>  			PMD_INIT_LOG(DEBUG, "Use regular Tx func"); @@ -2949,7
> +2951,8 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
>  		txq = dev->data->tx_queues[i];
>  		txq->tx_ftag_en = tx_ftag_en;
>  		/* Check if Vector Tx is satisfied */
> -		if (fm10k_tx_vec_condition_check(txq))
> +		if (fm10k_tx_vec_condition_check(txq) ||
> +				rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD)
>  			use_sse = 0;
>  	}
> 
> @@ -2983,7 +2986,9 @@ fm10k_set_rx_function(struct rte_eth_dev *dev)
>  	 * conditions to be met.
>  	 */
>  	if (!fm10k_rx_vec_condition_check(dev) &&
> -			dev_info->rx_vec_allowed && !rx_ftag_en) {
> +			dev_info->rx_vec_allowed && !rx_ftag_en &&
> +				rte_get_max_simd_bitwidth()
> +				>= RTE_MAX_128_SIMD) {
>  		if (dev->data->scattered_rx)
>  			dev->rx_pkt_burst = fm10k_recv_scattered_pkts_vec;
>  		else
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 10/18] net/ice: add checks for max SIMD bitwidth
  2020-10-09  0:04     ` Zhang, Qi Z
@ 2020-10-09  1:05       ` Zhang, Qi Z
  0 siblings, 0 replies; 276+ messages in thread
From: Zhang, Qi Z @ 2020-10-09  1:05 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Yang, Qiming



> -----Original Message-----
> From: Zhang, Qi Z
> Sent: Friday, October 9, 2020 8:05 AM
> To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>
> Subject: RE: [PATCH v3 10/18] net/ice: add checks for max SIMD bitwidth
> 
> 
> 
> > -----Original Message-----
> > From: Power, Ciara <ciara.power@intel.com>
> > Sent: Wednesday, September 30, 2020 9:04 PM
> > To: dev@dpdk.org
> > Cc: Power, Ciara <ciara.power@intel.com>; Yang, Qiming
> > <qiming.yang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> > Subject: [PATCH v3 10/18] net/ice: add checks for max SIMD bitwidth
> >
> > When choosing a vector path to take, an extra condition must be
> > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled path.
> >
> > Cc: Qiming Yang <qiming.yang@intel.com>
> > Cc: Qi Zhang <qi.z.zhang@intel.com>
> >
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> Acked-by: Qi Zhang <qi.z.zhang@intel.com>

I'd like to withdraw my ack, due to more consideration as below

> 
> > ---
> >  drivers/net/ice/ice_rxtx.c | 20 ++++++++++++++------
> >  1 file changed, 14 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
> > index fef6ad4544..5a29af743c 100644
> > --- a/drivers/net/ice/ice_rxtx.c
> > +++ b/drivers/net/ice/ice_rxtx.c
> > @@ -2936,7 +2936,9 @@ ice_set_rx_function(struct rte_eth_dev *dev)
> >  	bool use_avx2 = false;
> >
> >  	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > -		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed) {
> > +		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed &&
> > +				rte_get_max_simd_bitwidth()
> > +				>= RTE_MAX_128_SIMD) {
> >  			ad->rx_vec_allowed = true;
> >  			for (i = 0; i < dev->data->nb_rx_queues; i++) {
> >  				rxq = dev->data->rx_queues[i];
> > @@ -2946,8 +2948,10 @@ ice_set_rx_function(struct rte_eth_dev *dev)
> >  				}
> >  			}
> >
> > -			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> > -			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
> > +			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> > +			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
> > +					rte_get_max_simd_bitwidth()
> > +					>= RTE_MAX_256_SIMD)

As we have this max SIMD hint, the avx512 flag check is not necessary, 
and I think for old platform which not support avx512, the default RTE_DEFAULT_SIMD_BITWIDTH should be configured to 128, so AVX2 will not be over used.

> >  				use_avx2 = true;
> >
> >  		} else {
> > @@ -3114,7 +3118,9 @@ ice_set_tx_function(struct rte_eth_dev *dev)
> >  	bool use_avx2 = false;
> >
> >  	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > -		if (!ice_tx_vec_dev_check(dev)) {
> > +		if (!ice_tx_vec_dev_check(dev) &&
> > +				rte_get_max_simd_bitwidth()
> > +				>= RTE_MAX_128_SIMD) {
> >  			ad->tx_vec_allowed = true;
> >  			for (i = 0; i < dev->data->nb_tx_queues; i++) {
> >  				txq = dev->data->tx_queues[i];
> > @@ -3124,8 +3130,10 @@ ice_set_tx_function(struct rte_eth_dev *dev)
> >  				}
> >  			}
> >
> > -			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> > -			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
> > +			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> > +			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
> > +					rte_get_max_simd_bitwidth()
> > +					>= RTE_MAX_256_SIMD)
> >  				use_avx2 = true;
> >
> >  		} else {
> > --
> > 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for " Ciara Power
  2020-10-08 15:21     ` Ananyev, Konstantin
@ 2020-10-09  3:02     ` Guo, Jia
  2020-10-09 14:02       ` Power, Ciara
  1 sibling, 1 reply; 276+ messages in thread
From: Guo, Jia @ 2020-10-09  3:02 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Xing, Beilei

Hi, power

> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Wednesday, September 30, 2020 9:04 PM
> To: dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Xing, Beilei
> <beilei.xing@intel.com>; Guo, Jia <jia.guo@intel.com>
> Subject: [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path.
> 
> Cc: Beilei Xing <beilei.xing@intel.com>
> Cc: Jeff Guo <jia.guo@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
> 60b33d20a1..9b535b52fa 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -3098,7 +3098,8 @@ static eth_rx_burst_t  i40e_get_latest_rx_vec(bool
> scatter)  {  #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> +			rte_get_max_simd_bitwidth() >=

Nitpick: I think if consistent to keep alignment for open parenthesis in this patch set would be better. Do you think so?

> RTE_MAX_256_SIMD)
>  		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
>  				 i40e_recv_pkts_vec_avx2;
>  #endif
> @@ -3115,7 +3116,8 @@ i40e_get_recommend_rx_vec(bool scatter)
>  	 * use of AVX2 version to later plaforms, not all those that could
>  	 * theoretically run it.
>  	 */
> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
> +			rte_get_max_simd_bitwidth() >=
> RTE_MAX_256_SIMD)
>  		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
>  				 i40e_recv_pkts_vec_avx2;
>  #endif
> @@ -3154,7 +3156,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>  		}
>  	}
> 
> -	if (ad->rx_vec_allowed) {
> +	if (ad->rx_vec_allowed  && rte_get_max_simd_bitwidth()
> +			>= RTE_MAX_128_SIMD) {
>  		/* Vec Rx path */
>  		PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on
> port=%d.",
>  				dev->data->port_id);
> @@ -3268,7 +3271,8 @@ static eth_tx_burst_t
>  i40e_get_latest_tx_vec(void)
>  {
>  #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> +			rte_get_max_simd_bitwidth() >=
> RTE_MAX_256_SIMD)
>  		return i40e_xmit_pkts_vec_avx2;
>  #endif
>  	return i40e_xmit_pkts_vec;
> @@ -3283,7 +3287,8 @@ i40e_get_recommend_tx_vec(void)
>  	 * use of AVX2 version to later plaforms, not all those that could
>  	 * theoretically run it.
>  	 */
> -	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
> +			rte_get_max_simd_bitwidth() >=
> RTE_MAX_256_SIMD)
>  		return i40e_xmit_pkts_vec_avx2;
>  #endif
>  	return i40e_xmit_pkts_vec;
> @@ -3311,7 +3316,9 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
>  	}
> 
>  	if (ad->tx_simple_allowed) {
> -		if (ad->tx_vec_allowed) {
> +		if (ad->tx_vec_allowed &&
> +				rte_get_max_simd_bitwidth()
> +				>= RTE_MAX_128_SIMD) {
>  			PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
>  			if (ad->use_latest_vec)
>  				dev->tx_pkt_burst =
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
  2020-10-08 15:19     ` David Marchand
@ 2020-10-09 12:37       ` David Marchand
  0 siblings, 0 replies; 276+ messages in thread
From: David Marchand @ 2020-10-09 12:37 UTC (permalink / raw)
  To: Ciara Power
  Cc: dev, Bruce Richardson, Vladimir Medvedkin, Jerin Jacob, Ruifeng Wang

On Thu, Oct 8, 2020 at 5:19 PM David Marchand <david.marchand@redhat.com> wrote:
>
> On Wed, Sep 30, 2020 at 3:14 PM Ciara Power <ciara.power@intel.com> wrote:
> >
> > When choosing the vector path, max SIMD bitwidth is now checked to
> > ensure a vector path is allowable. To do this, rather than the vector
> > lookup functions being called directly from apps, a generic lookup
> > function is called which will call the vector functions if suitable.
> >
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
>
> Got a build error on this patch with ./devtools/test-meson-builds.sh
> ("gcc-shared" target):
>
> [2/3] Compiling C object
> 'examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o'.
> FAILED: examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o
> ccache gcc -Iexamples/c590b3c@@dpdk-l3fwd-thread@exe -Iexamples
> -I../../dpdk/examples -Iexamples/performance-thread/l3fwd-thread
> -I../../dpdk/examples/performance-thread/l3fwd-thread
> -I../../dpdk/examples/performance-thread/l3fwd-thread/../common
> -I../../dpdk/examples/performance-thread/l3fwd-thread/../common/arch/x86
> -I. -I../../dpdk/ -Iconfig -I../../dpdk/config
> -Ilib/librte_eal/include -I../../dpdk/lib/librte_eal/include
> -Ilib/librte_eal/linux/include
> -I../../dpdk/lib/librte_eal/linux/include -Ilib/librte_eal/x86/include
> -I../../dpdk/lib/librte_eal/x86/include -Ilib/librte_eal/common
> -I../../dpdk/lib/librte_eal/common -Ilib/librte_eal
> -I../../dpdk/lib/librte_eal -Ilib/librte_kvargs
> -I../../dpdk/lib/librte_kvargs
> -Ilib/librte_telemetry/../librte_metrics
> -I../../dpdk/lib/librte_telemetry/../librte_metrics
> -Ilib/librte_telemetry -I../../dpdk/lib/librte_telemetry
> -Ilib/librte_mempool -I../../dpdk/lib/librte_mempool -Ilib/librte_ring
> -I../../dpdk/lib/librte_ring -Ilib/librte_net
> -I../../dpdk/lib/librte_net -Ilib/librte_mbuf
> -I../../dpdk/lib/librte_mbuf -Ilib/librte_ethdev
> -I../../dpdk/lib/librte_ethdev -Ilib/librte_meter
> -I../../dpdk/lib/librte_meter -Ilib/librte_cmdline
> -I../../dpdk/lib/librte_cmdline -Ilib/librte_timer
> -I../../dpdk/lib/librte_timer -Ilib/librte_lpm
> -I../../dpdk/lib/librte_lpm -Ilib/librte_hash
> -I../../dpdk/lib/librte_hash -Ilib/librte_rcu
> -I../../dpdk/lib/librte_rcu
> -I/home/dmarchan/intel-ipsec-mb/install/include
> -fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall
> -Winvalid-pch -Werror -O2 -g -include rte_config.h -Wextra -Wcast-qual
> -Wdeprecated -Wformat-nonliteral -Wformat-security
> -Wmissing-declarations -Wmissing-prototypes -Wnested-externs
> -Wold-style-definition -Wpointer-arith -Wsign-compare
> -Wstrict-prototypes -Wundef -Wwrite-strings
> -Wno-address-of-packed-member -Wno-packed-not-aligned
> -Wno-missing-field-initializers -D_GNU_SOURCE -march=native
> -Wno-format-truncation -DALLOW_EXPERIMENTAL_API -MD -MQ
> 'examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o'
> -MF 'examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o.d'
> -o 'examples/c590b3c@@dpdk-l3fwd-thread@exe/performance-thread_l3fwd-thread_main.c.o'
> -c ../../dpdk/examples/performance-thread/l3fwd-thread/main.c
> #‘target_mem_ref’ not supported by expression#’In file included from
> ../../dpdk/examples/performance-thread/l3fwd-thread/main.c:133:
> ../../dpdk/examples/performance-thread/l3fwd-thread/main.c: In
> function ‘process_burst’:
> ../../dpdk/lib/librte_lpm/rte_lpm.h:435:7: error:  may be used
> uninitialized in this function [-Werror=maybe-uninitialized]
>   435 |   if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
>       |       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> cc1: all warnings being treated as errors
> ninja: build stopped: subcommand failed.
>
> gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2)

On the build issue, I guess you can use a rte_xmm_t passe-plat.

diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index edba7cafd5..43db784a76 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -430,10 +430,14 @@ static inline void
 rte_lpm_lookupx4_scalar(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
        uint32_t defv)
 {
-       int i;
-       for (i = 0; i < 4; i++)
-               if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
+       unsigned int i;
+       rte_xmm_t _ip;
+
+       _ip.x = ip;
+       for (i = 0; i < RTE_DIM(_ip.u32); i++) {
+               if (rte_lpm_lookup(lpm, _ip.u32[i], &hop[i]) < 0)
                        hop[i] = defv; /* lookupx4 expected to set on failure */
+       }
 }

 /**


-- 
David Marchand


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
  2020-10-09  3:02     ` Guo, Jia
@ 2020-10-09 14:02       ` Power, Ciara
  2020-10-10  2:07         ` Guo, Jia
  0 siblings, 1 reply; 276+ messages in thread
From: Power, Ciara @ 2020-10-09 14:02 UTC (permalink / raw)
  To: Guo, Jia, dev; +Cc: Xing, Beilei

Hi Jeff,

>-----Original Message-----
>From: Guo, Jia <jia.guo@intel.com>
>Sent: Friday 9 October 2020 04:03
>To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
>Cc: Xing, Beilei <beilei.xing@intel.com>
>Subject: RE: [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
>
>Hi, power
>
>> -----Original Message-----
>> From: Power, Ciara <ciara.power@intel.com>
>> Sent: Wednesday, September 30, 2020 9:04 PM
>> To: dev@dpdk.org
>> Cc: Power, Ciara <ciara.power@intel.com>; Xing, Beilei
>> <beilei.xing@intel.com>; Guo, Jia <jia.guo@intel.com>
>> Subject: [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
>>
>> When choosing a vector path to take, an extra condition must be
>> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled path.
>>
>> Cc: Beilei Xing <beilei.xing@intel.com>
>> Cc: Jeff Guo <jia.guo@intel.com>
>>
>> Signed-off-by: Ciara Power <ciara.power@intel.com>
>> ---
>>  drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
>>  1 file changed, 13 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/net/i40e/i40e_rxtx.c
>> b/drivers/net/i40e/i40e_rxtx.c index 60b33d20a1..9b535b52fa 100644
>> --- a/drivers/net/i40e/i40e_rxtx.c
>> +++ b/drivers/net/i40e/i40e_rxtx.c
>> @@ -3098,7 +3098,8 @@ static eth_rx_burst_t
>> i40e_get_latest_rx_vec(bool
>> scatter)  {  #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT) -if
>> (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
>> +if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
>> +rte_get_max_simd_bitwidth() >=
>
>Nitpick: I think if consistent to keep alignment for open parenthesis in this
>patch set would be better. Do you think so?
>

This file doesn't seem to have any if statements indented as you suggest, 
Some do have a double indent for the continued line as I have done here though.

<snip>

Thanks,
Ciara


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
  2020-10-08 14:40       ` Ananyev, Konstantin
@ 2020-10-09 14:31         ` Power, Ciara
  2020-10-11 22:49           ` Ananyev, Konstantin
  0 siblings, 1 reply; 276+ messages in thread
From: Power, Ciara @ 2020-10-09 14:31 UTC (permalink / raw)
  To: Ananyev, Konstantin, Medvedkin, Vladimir, dev
  Cc: Richardson, Bruce, Jerin Jacob, Ruifeng Wang

Hi Konstantin,


>-----Original Message-----
>From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
>Sent: Thursday 8 October 2020 15:40
>To: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>; Power, Ciara
><ciara.power@intel.com>; dev@dpdk.org
>Cc: Richardson, Bruce <bruce.richardson@intel.com>; Jerin Jacob
><jerinj@marvell.com>; Ruifeng Wang <ruifeng.wang@arm.com>
>Subject: RE: [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
>
>>
>> Hi Ciara,
>>
>>
>> On 30/09/2020 14:04, Ciara Power wrote:
>> > When choosing the vector path, max SIMD bitwidth is now checked to
>> > ensure a vector path is allowable. To do this, rather than the
>> > vector lookup functions being called directly from apps, a generic
>> > lookup function is called which will call the vector functions if suitable.
>> >
>> > Signed-off-by: Ciara Power <ciara.power@intel.com>
>> > ---
>> >   lib/librte_lpm/rte_lpm.h         | 57 ++++++++++++++++++++++++++------
>> >   lib/librte_lpm/rte_lpm_altivec.h |  2 +-
>> >   lib/librte_lpm/rte_lpm_neon.h    |  2 +-
>> >   lib/librte_lpm/rte_lpm_sse.h     |  2 +-
>> >   4 files changed, 50 insertions(+), 13 deletions(-)
>> >
>> > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
>> > index 03da2d37e0..edba7cafd5 100644
>> > --- a/lib/librte_lpm/rte_lpm.h
>> > +++ b/lib/librte_lpm/rte_lpm.h
>> > @@ -397,8 +397,18 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm
>*lpm, const uint32_t *ips,
>> >   /* Mask four results. */
>> >   #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ffffff00ffffff)
>> >
>> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) #include
>> > +"rte_lpm_neon.h"
>> > +#elif defined(RTE_ARCH_PPC_64)
>> > +#include "rte_lpm_altivec.h"
>> > +#else
>> > +#include "rte_lpm_sse.h"
>> > +#endif
>> > +
>> >   /**
>> > - * Lookup four IP addresses in an LPM table.
>> > + * Lookup four IP addresses in an LPM table individually by calling
>> > + the
>> > + * lookup function for each ip. This is used when lookupx4 is
>> > + called but
>> > + * the vector path is not suitable.
>> >    *
>> >    * @param lpm
>> >    *   LPM object handle
>> > @@ -417,16 +427,43 @@ rte_lpm_lookup_bulk_func(const struct
>rte_lpm *lpm, const uint32_t *ips,
>> >    *   if lookup would fail.
>> >    */
>> >   static inline void
>> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>> > -	uint32_t defv);
>> > +rte_lpm_lookupx4_scalar(struct rte_lpm *lpm, xmm_t ip, uint32_t
>hop[4],
>> > +	uint32_t defv)
>> > +{
>> > +	int i;
>> > +	for (i = 0; i < 4; i++)
>> > +		if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
>> > +			hop[i] = defv; /* lookupx4 expected to set on failure
>*/ }
>> >
>> > -#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) -#include
>> > "rte_lpm_neon.h"
>> > -#elif defined(RTE_ARCH_PPC_64)
>> > -#include "rte_lpm_altivec.h"
>> > -#else
>> > -#include "rte_lpm_sse.h"
>> > -#endif
>> > +/**
>> > + * Lookup four IP addresses in an LPM table.
>> > + *
>> > + * @param lpm
>> > + *   LPM object handle
>> > + * @param ip
>> > + *   Four IPs to be looked up in the LPM table
>> > + * @param hop
>> > + *   Next hop of the most specific rule found for IP (valid on lookup hit
>only).
>> > + *   This is an 4 elements array of two byte values.
>> > + *   If the lookup was successful for the given IP, then least significant
>byte
>> > + *   of the corresponding element is the  actual next hop and the most
>> > + *   significant byte is zero.
>> > + *   If the lookup for the given IP failed, then corresponding element
>would
>> > + *   contain default value, see description of then next parameter.
>> > + * @param defv
>> > + *   Default value to populate into corresponding element of hop[] array,
>> > + *   if lookup would fail.
>> > + */
>> > +static inline void
>> > +rte_lpm_lookupx4(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>> > +	uint32_t defv)
>> > +{
>> > +	if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
>> > +		rte_lpm_lookupx4_vec(lpm, ip, hop, defv);
>> > +	else
>> > +		rte_lpm_lookupx4_scalar(lpm, ip, hop, defv); }
>>
>> I'm afraid this will lead to a drop in performance. rte_lpm_lookupx4
>> is used in the hot path, and a bulk size is too small to amortize the
>> cost of adding this extra logic.
>
>I do share Vladimir's concern regarding performance here.
>As I said in other mail - it seems not much point to insert these checks into
>inline SSE specific function, as SSE is enabled by default for all x86 builds.
>

The performance impact is quite small, thanks Vladimir for providing these results:

before patches:
	LPM LookupX4: 25.1 cycles (fails = 12.5%)
	LPM LookupX4: 25.2 cycles (fails = 12.5%)
	LPM LookupX4: 25.2 cycles (fails = 12.5%)
 
v3:
	LPM LookupX4: 26.2 cycles (fails = 12.5%)
	LPM LookupX4: 26.2 cycles (fails = 12.5%)
	LPM LookupX4: 26.2 cycles (fails = 12.5%)
 
v4: 
Note: I haven't sent this publicly yet, modified v3 slightly to check the bitwidth
in LPM create and set a flag that is used in lookupx4 to choose either vector or scalar function.  
	LPM LookupX4: 25.5 cycles (fails = 12.5%)
	LPM LookupX4: 25.5 cycles (fails = 12.5%)
	LPM LookupX4: 25.5 cycles (fails = 12.5%)


Thanks,
Ciara

>As another more generic thought - might be better to avoid these checks in
>other public SIMD-specific inline functions (if any).
>If such function get called from some .c, then at least such SIMD ISA is
>already enabled for that .c file and I think this check should be
>left for caller to do.
>
>> >
>> >   #ifdef __cplusplus
>> >   }
>> > diff --git a/lib/librte_lpm/rte_lpm_altivec.h
>> > b/lib/librte_lpm/rte_lpm_altivec.h
>> > index 228c41b38e..82142d3351 100644
>> > --- a/lib/librte_lpm/rte_lpm_altivec.h
>> > +++ b/lib/librte_lpm/rte_lpm_altivec.h
>> > @@ -16,7 +16,7 @@ extern "C" {
>> >   #endif
>> >
>> >   static inline void
>> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t
>> > hop[4],
>> > +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t
>> > +hop[4],
>> >   	uint32_t defv)
>> >   {
>> >   	vector signed int i24;
>> > diff --git a/lib/librte_lpm/rte_lpm_neon.h
>> > b/lib/librte_lpm/rte_lpm_neon.h index 6c131d3125..14b184515d 100644
>> > --- a/lib/librte_lpm/rte_lpm_neon.h
>> > +++ b/lib/librte_lpm/rte_lpm_neon.h
>> > @@ -16,7 +16,7 @@ extern "C" {
>> >   #endif
>> >
>> >   static inline void
>> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t
>> > hop[4],
>> > +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t
>> > +hop[4],
>> >   	uint32_t defv)
>> >   {
>> >   	uint32x4_t i24;
>> > diff --git a/lib/librte_lpm/rte_lpm_sse.h
>> > b/lib/librte_lpm/rte_lpm_sse.h index 44770b6ff8..cb5477c6cf 100644
>> > --- a/lib/librte_lpm/rte_lpm_sse.h
>> > +++ b/lib/librte_lpm/rte_lpm_sse.h
>> > @@ -15,7 +15,7 @@ extern "C" {
>> >   #endif
>> >
>> >   static inline void
>> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t
>> > hop[4],
>> > +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t
>> > +hop[4],
>> >   	uint32_t defv)
>> >   {
>> >   	__m128i i24;
>> >
>>
>> --
>> Regards,
>> Vladimir

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
  2020-10-09 14:02       ` Power, Ciara
@ 2020-10-10  2:07         ` Guo, Jia
  2020-10-12  9:37           ` Bruce Richardson
  0 siblings, 1 reply; 276+ messages in thread
From: Guo, Jia @ 2020-10-10  2:07 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Xing, Beilei

Hi, power

> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Friday, October 9, 2020 10:03 PM
> To: Guo, Jia <jia.guo@intel.com>; dev@dpdk.org
> Cc: Xing, Beilei <beilei.xing@intel.com>
> Subject: RE: [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
> 
> Hi Jeff,
> 
> >-----Original Message-----
> >From: Guo, Jia <jia.guo@intel.com>
> >Sent: Friday 9 October 2020 04:03
> >To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> >Cc: Xing, Beilei <beilei.xing@intel.com>
> >Subject: RE: [PATCH v3 04/18] net/i40e: add checks for max SIMD
> >bitwidth
> >
> >Hi, power
> >
> >> -----Original Message-----
> >> From: Power, Ciara <ciara.power@intel.com>
> >> Sent: Wednesday, September 30, 2020 9:04 PM
> >> To: dev@dpdk.org
> >> Cc: Power, Ciara <ciara.power@intel.com>; Xing, Beilei
> >> <beilei.xing@intel.com>; Guo, Jia <jia.guo@intel.com>
> >> Subject: [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
> >>
> >> When choosing a vector path to take, an extra condition must be
> >> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> >>
> >> Cc: Beilei Xing <beilei.xing@intel.com>
> >> Cc: Jeff Guo <jia.guo@intel.com>
> >>
> >> Signed-off-by: Ciara Power <ciara.power@intel.com>
> >> ---
> >>  drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
> >>  1 file changed, 13 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/drivers/net/i40e/i40e_rxtx.c
> >> b/drivers/net/i40e/i40e_rxtx.c index 60b33d20a1..9b535b52fa 100644
> >> --- a/drivers/net/i40e/i40e_rxtx.c
> >> +++ b/drivers/net/i40e/i40e_rxtx.c
> >> @@ -3098,7 +3098,8 @@ static eth_rx_burst_t
> >> i40e_get_latest_rx_vec(bool
> >> scatter)  {  #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
> >> -if
> >> (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> >> +if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> >> +rte_get_max_simd_bitwidth() >=
> >
> >Nitpick: I think if consistent to keep alignment for open parenthesis
> >in this patch set would be better. Do you think so?
> >
> 
> This file doesn't seem to have any if statements indented as you suggest,
> Some do have a double indent for the continued line as I have done here
> though.
> 

Sorry, maybe I didn't say clear, what I said is the "CHECK" as below when use checkpatch.pl to guaranty the patch's format.

CHECK: Alignment should match open parenthesis
#733: FILE: drivers/net/i40e/i40e_rxtx.c:3102:
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+                       rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)

CHECK: Alignment should match open parenthesis
#743: FILE: drivers/net/i40e/i40e_rxtx.c:3120:
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+                       rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)

CHECK: Alignment should match open parenthesis
#763: FILE: drivers/net/i40e/i40e_rxtx.c:3275:
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+                       rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)

CHECK: Alignment should match open parenthesis
#773: FILE: drivers/net/i40e/i40e_rxtx.c:3291:
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+                       rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)

CHECK: Alignment should match open parenthesis
#783: FILE: drivers/net/i40e/i40e_rxtx.c:3320:
+               if (ad->tx_vec_allowed &&
+                               rte_get_max_simd_bitwidth()

> <snip>
> 
> Thanks,
> Ciara
> 


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
  2020-09-30 13:04   ` [dpdk-dev] [PATCH v3 11/18] net/ixgbe: " Ciara Power
  2020-10-08 15:05     ` Ananyev, Konstantin
@ 2020-10-10 13:13     ` Wang, Haiyue
  2020-10-11 22:31       ` Ananyev, Konstantin
  1 sibling, 1 reply; 276+ messages in thread
From: Wang, Haiyue @ 2020-10-10 13:13 UTC (permalink / raw)
  To: Power, Ciara, dev; +Cc: Zhao1, Wei, Guo, Jia, Ananyev, Konstantin

Hi Ciara,

> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Wednesday, September 30, 2020 21:04
> To: dev@dpdk.org
> Cc: Power, Ciara <ciara.power@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia
> <jia.guo@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>
> Subject: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> Cc: Wei Zhao <wei.zhao1@intel.com>
> Cc: Jeff Guo <jia.guo@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 977ecf5137..eadc7183f2 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
>  		dev->tx_pkt_prepare = NULL;
>  		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
>  				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> -					ixgbe_txq_vec_setup(txq) == 0)) {
> +					ixgbe_txq_vec_setup(txq) == 0) &&
> +				rte_get_max_simd_bitwidth()

As Konstantin mentioned: " I think it is a bit safer to do all checks first before
 doing txq_vec_setup()."

Fox x86 & arm platforms, the setup is always 0, since 'sw_ring_v' is union with
'sw_ring' which is initialize at 'ixgbe_dev_tx_queue_setup'.

	union {
		struct ixgbe_tx_entry *sw_ring; /**< address of SW ring for scalar PMD. */
		struct ixgbe_tx_entry_v *sw_ring_v; /**< address of SW ring for vector PMD */
	};

static inline int
ixgbe_txq_vec_setup_default(struct ixgbe_tx_queue *txq,
			    const struct ixgbe_txq_ops *txq_ops)
{
	if (txq->sw_ring_v == NULL)
		return -1;

	/* leave the first one for overflow */
	txq->sw_ring_v = txq->sw_ring_v + 1;
	txq->ops = txq_ops;

	return 0;
}

So we need check the SIMD bitwidth firstly to avoid changing the sw_ring* pointer address.


Also, looks like we need to add check on:

int
ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
{
	struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
	if (txq->offloads == 0 &&
#ifdef RTE_LIBRTE_SECURITY
			!(txq->using_ipsec) &&
#endif
			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
                                                     <------------------- Add the same check
				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
					txq->sw_ring_v != NULL)) {
			return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
		} else {
			return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
		}
	}

> +				>= RTE_MAX_128_SIMD) {
>  			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
>  			dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
>  		} else
> @@ -4743,7 +4745,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
>  	 * conditions to be met and Rx Bulk Allocation should be allowed.
>  	 */
>  	if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
> -	    !adapter->rx_bulk_alloc_allowed) {
> +	    !adapter->rx_bulk_alloc_allowed ||
> +			rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD) {
>  		PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
>  				    "preconditions",
>  			     dev->data->port_id);
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
  2020-10-10 13:13     ` Wang, Haiyue
@ 2020-10-11 22:31       ` Ananyev, Konstantin
  2020-10-12  1:29         ` Wang, Haiyue
  0 siblings, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-11 22:31 UTC (permalink / raw)
  To: Wang, Haiyue, Power, Ciara, dev; +Cc: Zhao1, Wei, Guo, Jia



> > From: Power, Ciara <ciara.power@intel.com>
> > Sent: Wednesday, September 30, 2020 21:04
> > To: dev@dpdk.org
> > Cc: Power, Ciara <ciara.power@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia
> > <jia.guo@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>
> > Subject: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> >
> > When choosing a vector path to take, an extra condition must be
> > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> > path.
> >
> > Cc: Wei Zhao <wei.zhao1@intel.com>
> > Cc: Jeff Guo <jia.guo@intel.com>
> >
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > ---
> >  drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
> >  1 file changed, 5 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> > index 977ecf5137..eadc7183f2 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > @@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
> >  		dev->tx_pkt_prepare = NULL;
> >  		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> >  				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > -					ixgbe_txq_vec_setup(txq) == 0)) {
> > +					ixgbe_txq_vec_setup(txq) == 0) &&
> > +				rte_get_max_simd_bitwidth()
> 
> As Konstantin mentioned: " I think it is a bit safer to do all checks first before
>  doing txq_vec_setup()."
> 
> Fox x86 & arm platforms, the setup is always 0, since 'sw_ring_v' is union with
> 'sw_ring' which is initialize at 'ixgbe_dev_tx_queue_setup'.
> 
> 	union {
> 		struct ixgbe_tx_entry *sw_ring; /**< address of SW ring for scalar PMD. */
> 		struct ixgbe_tx_entry_v *sw_ring_v; /**< address of SW ring for vector PMD */
> 	};
> 
> static inline int
> ixgbe_txq_vec_setup_default(struct ixgbe_tx_queue *txq,
> 			    const struct ixgbe_txq_ops *txq_ops)
> {
> 	if (txq->sw_ring_v == NULL)
> 		return -1;
> 
> 	/* leave the first one for overflow */
> 	txq->sw_ring_v = txq->sw_ring_v + 1;
> 	txq->ops = txq_ops;
> 
> 	return 0;
> }
> 
> So we need check the SIMD bitwidth firstly to avoid changing the sw_ring* pointer address.
> 
> 
> Also, looks like we need to add check on:
> 
> int
> ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
> {
> 	struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
> 	if (txq->offloads == 0 &&
> #ifdef RTE_LIBRTE_SECURITY
> 			!(txq->using_ipsec) &&
> #endif
> 			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
> 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
>                                                      <------------------- Add the same check
> 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> 					txq->sw_ring_v != NULL)) {
> 			return ixgbe_tx_done_cleanup_vec(txq, free_cnt);

Could you probably explain a bit more why it is needed?

> 		} else {
> 			return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
> 		}
> 	}
> 
> > +				>= RTE_MAX_128_SIMD) {
> >  			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
> >  			dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
> >  		} else
> > @@ -4743,7 +4745,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
> >  	 * conditions to be met and Rx Bulk Allocation should be allowed.
> >  	 */
> >  	if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
> > -	    !adapter->rx_bulk_alloc_allowed) {
> > +	    !adapter->rx_bulk_alloc_allowed ||
> > +			rte_get_max_simd_bitwidth() < RTE_MAX_128_SIMD) {
> >  		PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
> >  				    "preconditions",
> >  			     dev->data->port_id);
> > --
> > 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
  2020-10-09 14:31         ` Power, Ciara
@ 2020-10-11 22:49           ` Ananyev, Konstantin
  0 siblings, 0 replies; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-11 22:49 UTC (permalink / raw)
  To: Power, Ciara, Medvedkin, Vladimir, dev
  Cc: Richardson, Bruce, Jerin Jacob, Ruifeng Wang

Hi Ciara,

> Hi Konstantin,
> 
> 
> >-----Original Message-----
> >From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> >Sent: Thursday 8 October 2020 15:40
> >To: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>; Power, Ciara
> ><ciara.power@intel.com>; dev@dpdk.org
> >Cc: Richardson, Bruce <bruce.richardson@intel.com>; Jerin Jacob
> ><jerinj@marvell.com>; Ruifeng Wang <ruifeng.wang@arm.com>
> >Subject: RE: [dpdk-dev] [PATCH v3 18/18] lpm: choose vector path at runtime
> >
> >>
> >> Hi Ciara,
> >>
> >>
> >> On 30/09/2020 14:04, Ciara Power wrote:
> >> > When choosing the vector path, max SIMD bitwidth is now checked to
> >> > ensure a vector path is allowable. To do this, rather than the
> >> > vector lookup functions being called directly from apps, a generic
> >> > lookup function is called which will call the vector functions if suitable.
> >> >
> >> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> >> > ---
> >> >   lib/librte_lpm/rte_lpm.h         | 57 ++++++++++++++++++++++++++------
> >> >   lib/librte_lpm/rte_lpm_altivec.h |  2 +-
> >> >   lib/librte_lpm/rte_lpm_neon.h    |  2 +-
> >> >   lib/librte_lpm/rte_lpm_sse.h     |  2 +-
> >> >   4 files changed, 50 insertions(+), 13 deletions(-)
> >> >
> >> > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> >> > index 03da2d37e0..edba7cafd5 100644
> >> > --- a/lib/librte_lpm/rte_lpm.h
> >> > +++ b/lib/librte_lpm/rte_lpm.h
> >> > @@ -397,8 +397,18 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm
> >*lpm, const uint32_t *ips,
> >> >   /* Mask four results. */
> >> >   #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ffffff00ffffff)
> >> >
> >> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) #include
> >> > +"rte_lpm_neon.h"
> >> > +#elif defined(RTE_ARCH_PPC_64)
> >> > +#include "rte_lpm_altivec.h"
> >> > +#else
> >> > +#include "rte_lpm_sse.h"
> >> > +#endif
> >> > +
> >> >   /**
> >> > - * Lookup four IP addresses in an LPM table.
> >> > + * Lookup four IP addresses in an LPM table individually by calling
> >> > + the
> >> > + * lookup function for each ip. This is used when lookupx4 is
> >> > + called but
> >> > + * the vector path is not suitable.
> >> >    *
> >> >    * @param lpm
> >> >    *   LPM object handle
> >> > @@ -417,16 +427,43 @@ rte_lpm_lookup_bulk_func(const struct
> >rte_lpm *lpm, const uint32_t *ips,
> >> >    *   if lookup would fail.
> >> >    */
> >> >   static inline void
> >> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> >> > -	uint32_t defv);
> >> > +rte_lpm_lookupx4_scalar(struct rte_lpm *lpm, xmm_t ip, uint32_t
> >hop[4],
> >> > +	uint32_t defv)
> >> > +{
> >> > +	int i;
> >> > +	for (i = 0; i < 4; i++)
> >> > +		if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
> >> > +			hop[i] = defv; /* lookupx4 expected to set on failure
> >*/ }
> >> >
> >> > -#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) -#include
> >> > "rte_lpm_neon.h"
> >> > -#elif defined(RTE_ARCH_PPC_64)
> >> > -#include "rte_lpm_altivec.h"
> >> > -#else
> >> > -#include "rte_lpm_sse.h"
> >> > -#endif
> >> > +/**
> >> > + * Lookup four IP addresses in an LPM table.
> >> > + *
> >> > + * @param lpm
> >> > + *   LPM object handle
> >> > + * @param ip
> >> > + *   Four IPs to be looked up in the LPM table
> >> > + * @param hop
> >> > + *   Next hop of the most specific rule found for IP (valid on lookup hit
> >only).
> >> > + *   This is an 4 elements array of two byte values.
> >> > + *   If the lookup was successful for the given IP, then least significant
> >byte
> >> > + *   of the corresponding element is the  actual next hop and the most
> >> > + *   significant byte is zero.
> >> > + *   If the lookup for the given IP failed, then corresponding element
> >would
> >> > + *   contain default value, see description of then next parameter.
> >> > + * @param defv
> >> > + *   Default value to populate into corresponding element of hop[] array,
> >> > + *   if lookup would fail.
> >> > + */
> >> > +static inline void
> >> > +rte_lpm_lookupx4(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> >> > +	uint32_t defv)
> >> > +{
> >> > +	if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
> >> > +		rte_lpm_lookupx4_vec(lpm, ip, hop, defv);
> >> > +	else
> >> > +		rte_lpm_lookupx4_scalar(lpm, ip, hop, defv); }
> >>
> >> I'm afraid this will lead to a drop in performance. rte_lpm_lookupx4
> >> is used in the hot path, and a bulk size is too small to amortize the
> >> cost of adding this extra logic.
> >
> >I do share Vladimir's concern regarding performance here.
> >As I said in other mail - it seems not much point to insert these checks into
> >inline SSE specific function, as SSE is enabled by default for all x86 builds.
> >
> 
> The performance impact is quite small, thanks Vladimir for providing these results:
> 
> before patches:
> 	LPM LookupX4: 25.1 cycles (fails = 12.5%)
> 	LPM LookupX4: 25.2 cycles (fails = 12.5%)
> 	LPM LookupX4: 25.2 cycles (fails = 12.5%)
> 
> v3:
> 	LPM LookupX4: 26.2 cycles (fails = 12.5%)
> 	LPM LookupX4: 26.2 cycles (fails = 12.5%)
> 	LPM LookupX4: 26.2 cycles (fails = 12.5%) 

Yes, perf difference is surprisingly small...
Wonder what tests did you use for that?
I'd expect that on l3fwd it would be more noticeable,
especially on machines with low-end cpus. 

> v4:
> Note: I haven't sent this publicly yet, modified v3 slightly to check the bitwidth
> in LPM create and set a flag that is used in lookupx4 to choose either vector or scalar function.

Yes, avoiding function call will definitely help here.
Though I am sill not convinced we have to make such checks in that function at all
(and other inline functions).
Inline functions will be compiled within user code and their behaviour should be controlled
together with the rest of user code.
Let say  in l3fwd for IA rte_lpm_lookupx4 is called from /l3fwd_lpm_sse.h,
which as name implies is supposed to be build and used with SSE enabled.
If we'd like l3fwd to obey 'max-simd-width' parameter, then it needs to be done
somewhere at startup, when behaviour is selected, not inside every possible inline function
that does use SSE instrincts.

> 	LPM LookupX4: 25.5 cycles (fails = 12.5%)
> 	LPM LookupX4: 25.5 cycles (fails = 12.5%)
> 	LPM LookupX4: 25.5 cycles (fails = 12.5%)
> 
> 
> Thanks,
> Ciara
> 
> >As another more generic thought - might be better to avoid these checks in
> >other public SIMD-specific inline functions (if any).
> >If such function get called from some .c, then at least such SIMD ISA is
> >already enabled for that .c file and I think this check should be
> >left for caller to do.
> >
> >> >
> >> >   #ifdef __cplusplus
> >> >   }
> >> > diff --git a/lib/librte_lpm/rte_lpm_altivec.h
> >> > b/lib/librte_lpm/rte_lpm_altivec.h
> >> > index 228c41b38e..82142d3351 100644
> >> > --- a/lib/librte_lpm/rte_lpm_altivec.h
> >> > +++ b/lib/librte_lpm/rte_lpm_altivec.h
> >> > @@ -16,7 +16,7 @@ extern "C" {
> >> >   #endif
> >> >
> >> >   static inline void
> >> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t
> >> > hop[4],
> >> > +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t
> >> > +hop[4],
> >> >   	uint32_t defv)
> >> >   {
> >> >   	vector signed int i24;
> >> > diff --git a/lib/librte_lpm/rte_lpm_neon.h
> >> > b/lib/librte_lpm/rte_lpm_neon.h index 6c131d3125..14b184515d 100644
> >> > --- a/lib/librte_lpm/rte_lpm_neon.h
> >> > +++ b/lib/librte_lpm/rte_lpm_neon.h
> >> > @@ -16,7 +16,7 @@ extern "C" {
> >> >   #endif
> >> >
> >> >   static inline void
> >> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t
> >> > hop[4],
> >> > +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t
> >> > +hop[4],
> >> >   	uint32_t defv)
> >> >   {
> >> >   	uint32x4_t i24;
> >> > diff --git a/lib/librte_lpm/rte_lpm_sse.h
> >> > b/lib/librte_lpm/rte_lpm_sse.h index 44770b6ff8..cb5477c6cf 100644
> >> > --- a/lib/librte_lpm/rte_lpm_sse.h
> >> > +++ b/lib/librte_lpm/rte_lpm_sse.h
> >> > @@ -15,7 +15,7 @@ extern "C" {
> >> >   #endif
> >> >
> >> >   static inline void
> >> > -rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t
> >> > hop[4],
> >> > +rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t
> >> > +hop[4],
> >> >   	uint32_t defv)
> >> >   {
> >> >   	__m128i i24;
> >> >
> >>
> >> --
> >> Regards,
> >> Vladimir

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
  2020-10-11 22:31       ` Ananyev, Konstantin
@ 2020-10-12  1:29         ` Wang, Haiyue
  2020-10-12  9:09           ` Ananyev, Konstantin
  0 siblings, 1 reply; 276+ messages in thread
From: Wang, Haiyue @ 2020-10-12  1:29 UTC (permalink / raw)
  To: Ananyev, Konstantin, Power, Ciara, dev; +Cc: Zhao1, Wei, Guo, Jia

> -----Original Message-----
> From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Sent: Monday, October 12, 2020 06:31
> To: Wang, Haiyue <haiyue.wang@intel.com>; Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> Cc: Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia <jia.guo@intel.com>
> Subject: RE: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> 
> 
> 
> > > From: Power, Ciara <ciara.power@intel.com>
> > > Sent: Wednesday, September 30, 2020 21:04
> > > To: dev@dpdk.org
> > > Cc: Power, Ciara <ciara.power@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia
> > > <jia.guo@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>
> > > Subject: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> > >
> > > When choosing a vector path to take, an extra condition must be
> > > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> > > path.
> > >
> > > Cc: Wei Zhao <wei.zhao1@intel.com>
> > > Cc: Jeff Guo <jia.guo@intel.com>
> > >
> > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > > ---
> > >  drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
> > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > index 977ecf5137..eadc7183f2 100644
> > > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > @@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
> > >  		dev->tx_pkt_prepare = NULL;
> > >  		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > >  				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > -					ixgbe_txq_vec_setup(txq) == 0)) {
> > > +					ixgbe_txq_vec_setup(txq) == 0) &&
> > > +				rte_get_max_simd_bitwidth()
> >
> > As Konstantin mentioned: " I think it is a bit safer to do all checks first before
> >  doing txq_vec_setup()."
> >
> > Fox x86 & arm platforms, the setup is always 0, since 'sw_ring_v' is union with
> > 'sw_ring' which is initialize at 'ixgbe_dev_tx_queue_setup'.
> >
> > 	union {
> > 		struct ixgbe_tx_entry *sw_ring; /**< address of SW ring for scalar PMD. */
> > 		struct ixgbe_tx_entry_v *sw_ring_v; /**< address of SW ring for vector PMD */
> > 	};
> >
> > static inline int
> > ixgbe_txq_vec_setup_default(struct ixgbe_tx_queue *txq,
> > 			    const struct ixgbe_txq_ops *txq_ops)
> > {
> > 	if (txq->sw_ring_v == NULL)
> > 		return -1;
> >
> > 	/* leave the first one for overflow */
> > 	txq->sw_ring_v = txq->sw_ring_v + 1;
> > 	txq->ops = txq_ops;
> >
> > 	return 0;
> > }
> >
> > So we need check the SIMD bitwidth firstly to avoid changing the sw_ring* pointer address.
> >
> >
> > Also, looks like we need to add check on:
> >
> > int
> > ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
> > {
> > 	struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
> > 	if (txq->offloads == 0 &&
> > #ifdef RTE_LIBRTE_SECURITY
> > 			!(txq->using_ipsec) &&
> > #endif
> > 			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
> > 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> >                                                      <------------------- Add the same check
> > 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > 					txq->sw_ring_v != NULL)) {
> > 			return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
> 
> Could you probably explain a bit more why it is needed?

To align with the vector selection path:

		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
					ixgbe_txq_vec_setup(txq) == 0))


> 
> > 		} else {
> > 			return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
> > 		}


> > > 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
  2020-10-12  1:29         ` Wang, Haiyue
@ 2020-10-12  9:09           ` Ananyev, Konstantin
  2020-10-12 16:04             ` Wang, Haiyue
  0 siblings, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-12  9:09 UTC (permalink / raw)
  To: Wang, Haiyue, Power, Ciara, dev; +Cc: Zhao1, Wei, Guo, Jia

> > > > From: Power, Ciara <ciara.power@intel.com>
> > > > Sent: Wednesday, September 30, 2020 21:04
> > > > To: dev@dpdk.org
> > > > Cc: Power, Ciara <ciara.power@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia
> > > > <jia.guo@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>
> > > > Subject: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> > > >
> > > > When choosing a vector path to take, an extra condition must be
> > > > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> > > > path.
> > > >
> > > > Cc: Wei Zhao <wei.zhao1@intel.com>
> > > > Cc: Jeff Guo <jia.guo@intel.com>
> > > >
> > > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > > > ---
> > > >  drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
> > > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > index 977ecf5137..eadc7183f2 100644
> > > > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > @@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
> > > >  		dev->tx_pkt_prepare = NULL;
> > > >  		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > > >  				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > > -					ixgbe_txq_vec_setup(txq) == 0)) {
> > > > +					ixgbe_txq_vec_setup(txq) == 0) &&
> > > > +				rte_get_max_simd_bitwidth()
> > >
> > > As Konstantin mentioned: " I think it is a bit safer to do all checks first before
> > >  doing txq_vec_setup()."
> > >
> > > Fox x86 & arm platforms, the setup is always 0, since 'sw_ring_v' is union with
> > > 'sw_ring' which is initialize at 'ixgbe_dev_tx_queue_setup'.
> > >
> > > 	union {
> > > 		struct ixgbe_tx_entry *sw_ring; /**< address of SW ring for scalar PMD. */
> > > 		struct ixgbe_tx_entry_v *sw_ring_v; /**< address of SW ring for vector PMD */
> > > 	};
> > >
> > > static inline int
> > > ixgbe_txq_vec_setup_default(struct ixgbe_tx_queue *txq,
> > > 			    const struct ixgbe_txq_ops *txq_ops)
> > > {
> > > 	if (txq->sw_ring_v == NULL)
> > > 		return -1;
> > >
> > > 	/* leave the first one for overflow */
> > > 	txq->sw_ring_v = txq->sw_ring_v + 1;
> > > 	txq->ops = txq_ops;
> > >
> > > 	return 0;
> > > }
> > >
> > > So we need check the SIMD bitwidth firstly to avoid changing the sw_ring* pointer address.
> > >
> > >
> > > Also, looks like we need to add check on:
> > >
> > > int
> > > ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
> > > {
> > > 	struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
> > > 	if (txq->offloads == 0 &&
> > > #ifdef RTE_LIBRTE_SECURITY
> > > 			!(txq->using_ipsec) &&
> > > #endif
> > > 			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
> > > 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > >                                                      <------------------- Add the same check
> > > 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > 					txq->sw_ring_v != NULL)) {
> > > 			return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
> >
> > Could you probably explain a bit more why it is needed?
> 
> To align with the vector selection path:
> 
> 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> 					ixgbe_txq_vec_setup(txq) == 0))


Ok, so to make sure that TX is running in vector mode?
If so, then doesn't txq->sw_ring_v != NULL was intended to do so?
BTW, is it a valid check? Considering that sw_ring and sw_ring_v
is a union?

> 
> 
> >
> > > 		} else {
> > > 			return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
> > > 		}
> 
> 
> > > > 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
  2020-10-10  2:07         ` Guo, Jia
@ 2020-10-12  9:37           ` Bruce Richardson
  2020-10-13  2:15             ` Guo, Jia
  0 siblings, 1 reply; 276+ messages in thread
From: Bruce Richardson @ 2020-10-12  9:37 UTC (permalink / raw)
  To: Guo, Jia; +Cc: Power, Ciara, dev, Xing, Beilei

On Sat, Oct 10, 2020 at 02:07:15AM +0000, Guo, Jia wrote:
> Hi, power
> 
> > -----Original Message-----
> > From: Power, Ciara <ciara.power@intel.com>
> > Sent: Friday, October 9, 2020 10:03 PM
> > To: Guo, Jia <jia.guo@intel.com>; dev@dpdk.org
> > Cc: Xing, Beilei <beilei.xing@intel.com>
> > Subject: RE: [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
> > 
> > Hi Jeff,
> > 
> > >-----Original Message-----
> > >From: Guo, Jia <jia.guo@intel.com>
> > >Sent: Friday 9 October 2020 04:03
> > >To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> > >Cc: Xing, Beilei <beilei.xing@intel.com>
> > >Subject: RE: [PATCH v3 04/18] net/i40e: add checks for max SIMD
> > >bitwidth
> > >
> > >Hi, power
> > >
> > >> -----Original Message-----
> > >> From: Power, Ciara <ciara.power@intel.com>
> > >> Sent: Wednesday, September 30, 2020 9:04 PM
> > >> To: dev@dpdk.org
> > >> Cc: Power, Ciara <ciara.power@intel.com>; Xing, Beilei
> > >> <beilei.xing@intel.com>; Guo, Jia <jia.guo@intel.com>
> > >> Subject: [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
> > >>
> > >> When choosing a vector path to take, an extra condition must be
> > >> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> > path.
> > >>
> > >> Cc: Beilei Xing <beilei.xing@intel.com>
> > >> Cc: Jeff Guo <jia.guo@intel.com>
> > >>
> > >> Signed-off-by: Ciara Power <ciara.power@intel.com>
> > >> ---
> > >>  drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
> > >>  1 file changed, 13 insertions(+), 6 deletions(-)
> > >>
> > >> diff --git a/drivers/net/i40e/i40e_rxtx.c
> > >> b/drivers/net/i40e/i40e_rxtx.c index 60b33d20a1..9b535b52fa 100644
> > >> --- a/drivers/net/i40e/i40e_rxtx.c
> > >> +++ b/drivers/net/i40e/i40e_rxtx.c
> > >> @@ -3098,7 +3098,8 @@ static eth_rx_burst_t
> > >> i40e_get_latest_rx_vec(bool
> > >> scatter)  {  #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
> > >> -if
> > >> (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> > >> +if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> > >> +rte_get_max_simd_bitwidth() >=
> > >
> > >Nitpick: I think if consistent to keep alignment for open parenthesis
> > >in this patch set would be better. Do you think so?
> > >
> > 
> > This file doesn't seem to have any if statements indented as you suggest,
> > Some do have a double indent for the continued line as I have done here
> > though.
> > 
> 
> Sorry, maybe I didn't say clear, what I said is the "CHECK" as below when use checkpatch.pl to guaranty the patch's format.
> 
> CHECK: Alignment should match open parenthesis
> #733: FILE: drivers/net/i40e/i40e_rxtx.c:3102:
> +       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> +                       rte_get_max_simd_bitwidth() >= RTE_MAX_256_SIMD)
> 
Did you run checkpatch using the DPDK "checkpatches.sh" script? In that
script there are a list of things to ignore, one of which is
"PARENTHESIS_ALIGNMENT", so that should not be flagged here. It's also not
flagged in patchwork by the CI system.

/Bruce

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
  2020-10-12  9:09           ` Ananyev, Konstantin
@ 2020-10-12 16:04             ` Wang, Haiyue
  2020-10-12 16:24               ` Ananyev, Konstantin
  0 siblings, 1 reply; 276+ messages in thread
From: Wang, Haiyue @ 2020-10-12 16:04 UTC (permalink / raw)
  To: Ananyev, Konstantin, Power, Ciara, dev; +Cc: Zhao1, Wei, Guo, Jia

> -----Original Message-----
> From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Sent: Monday, October 12, 2020 17:09
> To: Wang, Haiyue <haiyue.wang@intel.com>; Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> Cc: Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia <jia.guo@intel.com>
> Subject: RE: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> 
> > > > > From: Power, Ciara <ciara.power@intel.com>
> > > > > Sent: Wednesday, September 30, 2020 21:04
> > > > > To: dev@dpdk.org
> > > > > Cc: Power, Ciara <ciara.power@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia
> > > > > <jia.guo@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>
> > > > > Subject: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> > > > >
> > > > > When choosing a vector path to take, an extra condition must be
> > > > > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> > > > > path.
> > > > >
> > > > > Cc: Wei Zhao <wei.zhao1@intel.com>
> > > > > Cc: Jeff Guo <jia.guo@intel.com>
> > > > >
> > > > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > > > > ---
> > > > >  drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
> > > > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > > index 977ecf5137..eadc7183f2 100644
> > > > > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > > @@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue
> *txq)
> > > > >  		dev->tx_pkt_prepare = NULL;
> > > > >  		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > > > >  				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > > > -					ixgbe_txq_vec_setup(txq) == 0)) {
> > > > > +					ixgbe_txq_vec_setup(txq) == 0) &&
> > > > > +				rte_get_max_simd_bitwidth()
> > > >
> > > > As Konstantin mentioned: " I think it is a bit safer to do all checks first before
> > > >  doing txq_vec_setup()."
> > > >
> > > > Fox x86 & arm platforms, the setup is always 0, since 'sw_ring_v' is union with
> > > > 'sw_ring' which is initialize at 'ixgbe_dev_tx_queue_setup'.
> > > >
> > > > 	union {
> > > > 		struct ixgbe_tx_entry *sw_ring; /**< address of SW ring for scalar PMD. */
> > > > 		struct ixgbe_tx_entry_v *sw_ring_v; /**< address of SW ring for vector PMD */
> > > > 	};
> > > >
> > > > static inline int
> > > > ixgbe_txq_vec_setup_default(struct ixgbe_tx_queue *txq,
> > > > 			    const struct ixgbe_txq_ops *txq_ops)
> > > > {
> > > > 	if (txq->sw_ring_v == NULL)
> > > > 		return -1;
> > > >
> > > > 	/* leave the first one for overflow */
> > > > 	txq->sw_ring_v = txq->sw_ring_v + 1;
> > > > 	txq->ops = txq_ops;
> > > >
> > > > 	return 0;
> > > > }
> > > >
> > > > So we need check the SIMD bitwidth firstly to avoid changing the sw_ring* pointer address.
> > > >
> > > >
> > > > Also, looks like we need to add check on:
> > > >
> > > > int
> > > > ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
> > > > {
> > > > 	struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
> > > > 	if (txq->offloads == 0 &&
> > > > #ifdef RTE_LIBRTE_SECURITY
> > > > 			!(txq->using_ipsec) &&
> > > > #endif
> > > > 			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
> > > > 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > > >                                                      <------------------- Add the same check
> > > > 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > > 					txq->sw_ring_v != NULL)) {
> > > > 			return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
> > >
> > > Could you probably explain a bit more why it is needed?
> >
> > To align with the vector selection path:
> >
> > 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > 					ixgbe_txq_vec_setup(txq) == 0))
> 
> 
> Ok, so to make sure that TX is running in vector mode?

That's right, since no variable to save the vector mode selection,
then the check condition should be the same.

> If so, then doesn't txq->sw_ring_v != NULL was intended to do so?
> BTW, is it a valid check? Considering that sw_ring and sw_ring_v
> is a union?

Yes, sw_ring_v should always be !NULL ;-)

> 
> >
> >
> > >
> > > > 		} else {
> > > > 			return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
> > > > 		}
> >
> >
> > > > > 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
  2020-10-12 16:04             ` Wang, Haiyue
@ 2020-10-12 16:24               ` Ananyev, Konstantin
  2020-10-13  1:12                 ` Wang, Haiyue
  0 siblings, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-12 16:24 UTC (permalink / raw)
  To: Wang, Haiyue, Power, Ciara, dev; +Cc: Zhao1, Wei, Guo, Jia


> > -----Original Message-----
> > From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> > Sent: Monday, October 12, 2020 17:09
> > To: Wang, Haiyue <haiyue.wang@intel.com>; Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> > Cc: Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia <jia.guo@intel.com>
> > Subject: RE: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> >
> > > > > > From: Power, Ciara <ciara.power@intel.com>
> > > > > > Sent: Wednesday, September 30, 2020 21:04
> > > > > > To: dev@dpdk.org
> > > > > > Cc: Power, Ciara <ciara.power@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia
> > > > > > <jia.guo@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>
> > > > > > Subject: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> > > > > >
> > > > > > When choosing a vector path to take, an extra condition must be
> > > > > > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> > > > > > path.
> > > > > >
> > > > > > Cc: Wei Zhao <wei.zhao1@intel.com>
> > > > > > Cc: Jeff Guo <jia.guo@intel.com>
> > > > > >
> > > > > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > > > > > ---
> > > > > >  drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
> > > > > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > > > index 977ecf5137..eadc7183f2 100644
> > > > > > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > > > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > > > @@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue
> > *txq)
> > > > > >  		dev->tx_pkt_prepare = NULL;
> > > > > >  		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > > > > >  				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > > > > -					ixgbe_txq_vec_setup(txq) == 0)) {
> > > > > > +					ixgbe_txq_vec_setup(txq) == 0) &&
> > > > > > +				rte_get_max_simd_bitwidth()
> > > > >
> > > > > As Konstantin mentioned: " I think it is a bit safer to do all checks first before
> > > > >  doing txq_vec_setup()."
> > > > >
> > > > > Fox x86 & arm platforms, the setup is always 0, since 'sw_ring_v' is union with
> > > > > 'sw_ring' which is initialize at 'ixgbe_dev_tx_queue_setup'.
> > > > >
> > > > > 	union {
> > > > > 		struct ixgbe_tx_entry *sw_ring; /**< address of SW ring for scalar PMD. */
> > > > > 		struct ixgbe_tx_entry_v *sw_ring_v; /**< address of SW ring for vector PMD */
> > > > > 	};
> > > > >
> > > > > static inline int
> > > > > ixgbe_txq_vec_setup_default(struct ixgbe_tx_queue *txq,
> > > > > 			    const struct ixgbe_txq_ops *txq_ops)
> > > > > {
> > > > > 	if (txq->sw_ring_v == NULL)
> > > > > 		return -1;
> > > > >
> > > > > 	/* leave the first one for overflow */
> > > > > 	txq->sw_ring_v = txq->sw_ring_v + 1;
> > > > > 	txq->ops = txq_ops;
> > > > >
> > > > > 	return 0;
> > > > > }
> > > > >
> > > > > So we need check the SIMD bitwidth firstly to avoid changing the sw_ring* pointer address.
> > > > >
> > > > >
> > > > > Also, looks like we need to add check on:
> > > > >
> > > > > int
> > > > > ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
> > > > > {
> > > > > 	struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
> > > > > 	if (txq->offloads == 0 &&
> > > > > #ifdef RTE_LIBRTE_SECURITY
> > > > > 			!(txq->using_ipsec) &&
> > > > > #endif
> > > > > 			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
> > > > > 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > > > >                                                      <------------------- Add the same check
> > > > > 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > > > 					txq->sw_ring_v != NULL)) {
> > > > > 			return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
> > > >
> > > > Could you probably explain a bit more why it is needed?
> > >
> > > To align with the vector selection path:
> > >
> > > 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > > 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > 					ixgbe_txq_vec_setup(txq) == 0))
> >
> >
> > Ok, so to make sure that TX is running in vector mode?
> 
> That's right, since no variable to save the vector mode selection,
> then the check condition should be the same.

What I am saying, that here instead of conditions we  should check
was vector mode already selected or not.
Probably the easiest way to do it - check what tx function is setup. 

> 
> > If so, then doesn't txq->sw_ring_v != NULL was intended to do so?
> > BTW, is it a valid check? Considering that sw_ring and sw_ring_v
> > is a union?
> 
> Yes, sw_ring_v should always be !NULL ;-)
> 
> >
> > >
> > >
> > > >
> > > > > 		} else {
> > > > > 			return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
> > > > > 		}
> > >
> > >
> > > > > > 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
  2020-10-12 16:24               ` Ananyev, Konstantin
@ 2020-10-13  1:12                 ` Wang, Haiyue
  0 siblings, 0 replies; 276+ messages in thread
From: Wang, Haiyue @ 2020-10-13  1:12 UTC (permalink / raw)
  To: Ananyev, Konstantin, Power, Ciara, dev; +Cc: Zhao1, Wei, Guo, Jia

> -----Original Message-----
> From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Sent: Tuesday, October 13, 2020 00:25
> To: Wang, Haiyue <haiyue.wang@intel.com>; Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> Cc: Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia <jia.guo@intel.com>
> Subject: RE: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> 
> 
> > > -----Original Message-----
> > > From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> > > Sent: Monday, October 12, 2020 17:09
> > > To: Wang, Haiyue <haiyue.wang@intel.com>; Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> > > Cc: Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia <jia.guo@intel.com>
> > > Subject: RE: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> > >
> > > > > > > From: Power, Ciara <ciara.power@intel.com>
> > > > > > > Sent: Wednesday, September 30, 2020 21:04
> > > > > > > To: dev@dpdk.org
> > > > > > > Cc: Power, Ciara <ciara.power@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia
> > > > > > > <jia.guo@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>
> > > > > > > Subject: [PATCH v3 11/18] net/ixgbe: add checks for max SIMD bitwidth
> > > > > > >
> > > > > > > When choosing a vector path to take, an extra condition must be
> > > > > > > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> > > > > > > path.
> > > > > > >
> > > > > > > Cc: Wei Zhao <wei.zhao1@intel.com>
> > > > > > > Cc: Jeff Guo <jia.guo@intel.com>
> > > > > > >
> > > > > > > Signed-off-by: Ciara Power <ciara.power@intel.com>
> > > > > > > ---
> > > > > > >  drivers/net/ixgbe/ixgbe_rxtx.c | 7 +++++--
> > > > > > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > > > > index 977ecf5137..eadc7183f2 100644
> > > > > > > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > > > > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > > > > > @@ -2503,7 +2503,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue
> > > *txq)
> > > > > > >  		dev->tx_pkt_prepare = NULL;
> > > > > > >  		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > > > > > >  				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > > > > > -					ixgbe_txq_vec_setup(txq) == 0)) {
> > > > > > > +					ixgbe_txq_vec_setup(txq) == 0) &&
> > > > > > > +				rte_get_max_simd_bitwidth()
> > > > > >
> > > > > > As Konstantin mentioned: " I think it is a bit safer to do all checks first before
> > > > > >  doing txq_vec_setup()."
> > > > > >
> > > > > > Fox x86 & arm platforms, the setup is always 0, since 'sw_ring_v' is union with
> > > > > > 'sw_ring' which is initialize at 'ixgbe_dev_tx_queue_setup'.
> > > > > >
> > > > > > 	union {
> > > > > > 		struct ixgbe_tx_entry *sw_ring; /**< address of SW ring for scalar PMD. */
> > > > > > 		struct ixgbe_tx_entry_v *sw_ring_v; /**< address of SW ring for vector PMD */
> > > > > > 	};
> > > > > >
> > > > > > static inline int
> > > > > > ixgbe_txq_vec_setup_default(struct ixgbe_tx_queue *txq,
> > > > > > 			    const struct ixgbe_txq_ops *txq_ops)
> > > > > > {
> > > > > > 	if (txq->sw_ring_v == NULL)
> > > > > > 		return -1;
> > > > > >
> > > > > > 	/* leave the first one for overflow */
> > > > > > 	txq->sw_ring_v = txq->sw_ring_v + 1;
> > > > > > 	txq->ops = txq_ops;
> > > > > >
> > > > > > 	return 0;
> > > > > > }
> > > > > >
> > > > > > So we need check the SIMD bitwidth firstly to avoid changing the sw_ring* pointer address.
> > > > > >
> > > > > >
> > > > > > Also, looks like we need to add check on:
> > > > > >
> > > > > > int
> > > > > > ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
> > > > > > {
> > > > > > 	struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
> > > > > > 	if (txq->offloads == 0 &&
> > > > > > #ifdef RTE_LIBRTE_SECURITY
> > > > > > 			!(txq->using_ipsec) &&
> > > > > > #endif
> > > > > > 			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
> > > > > > 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > > > > >                                                      <------------------- Add the same check
> > > > > > 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > > > > 					txq->sw_ring_v != NULL)) {
> > > > > > 			return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
> > > > >
> > > > > Could you probably explain a bit more why it is needed?
> > > >
> > > > To align with the vector selection path:
> > > >
> > > > 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
> > > > 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
> > > > 					ixgbe_txq_vec_setup(txq) == 0))
> > >
> > >
> > > Ok, so to make sure that TX is running in vector mode?
> >
> > That's right, since no variable to save the vector mode selection,
> > then the check condition should be the same.
> 
> What I am saying, that here instead of conditions we  should check
> was vector mode already selected or not.
> Probably the easiest way to do it - check what tx function is setup.
> 

Misunderstood, yes, this is more intuitive and clean.

> > > > > > > 2.17.1


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD bitwidth
  2020-10-12  9:37           ` Bruce Richardson
@ 2020-10-13  2:15             ` Guo, Jia
  0 siblings, 0 replies; 276+ messages in thread
From: Guo, Jia @ 2020-10-13  2:15 UTC (permalink / raw)
  To: Richardson, Bruce, Power, Ciara; +Cc: dev, Xing, Beilei


> -----Original Message-----
> From: Bruce Richardson <bruce.richardson@intel.com>
> Sent: Monday, October 12, 2020 5:38 PM
> To: Guo, Jia <jia.guo@intel.com>
> Cc: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org; Xing, Beilei
> <beilei.xing@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v3 04/18] net/i40e: add checks for max SIMD
> bitwidth
> 
> On Sat, Oct 10, 2020 at 02:07:15AM +0000, Guo, Jia wrote:
> > Hi, power
> >
> > > -----Original Message-----
> > > From: Power, Ciara <ciara.power@intel.com>
> > > Sent: Friday, October 9, 2020 10:03 PM
> > > To: Guo, Jia <jia.guo@intel.com>; dev@dpdk.org
> > > Cc: Xing, Beilei <beilei.xing@intel.com>
> > > Subject: RE: [PATCH v3 04/18] net/i40e: add checks for max SIMD
> > > bitwidth
> > >
> > > Hi Jeff,
> > >
> > > >-----Original Message-----
> > > >From: Guo, Jia <jia.guo@intel.com>
> > > >Sent: Friday 9 October 2020 04:03
> > > >To: Power, Ciara <ciara.power@intel.com>; dev@dpdk.org
> > > >Cc: Xing, Beilei <beilei.xing@intel.com>
> > > >Subject: RE: [PATCH v3 04/18] net/i40e: add checks for max SIMD
> > > >bitwidth
> > > >
> > > >Hi, power
> > > >
> > > >> -----Original Message-----
> > > >> From: Power, Ciara <ciara.power@intel.com>
> > > >> Sent: Wednesday, September 30, 2020 9:04 PM
> > > >> To: dev@dpdk.org
> > > >> Cc: Power, Ciara <ciara.power@intel.com>; Xing, Beilei
> > > >> <beilei.xing@intel.com>; Guo, Jia <jia.guo@intel.com>
> > > >> Subject: [PATCH v3 04/18] net/i40e: add checks for max SIMD
> > > >> bitwidth
> > > >>
> > > >> When choosing a vector path to take, an extra condition must be
> > > >> satisfied to ensure the max SIMD bitwidth allows for the CPU
> > > >> enabled
> > > path.
> > > >>
> > > >> Cc: Beilei Xing <beilei.xing@intel.com>
> > > >> Cc: Jeff Guo <jia.guo@intel.com>
> > > >>
> > > >> Signed-off-by: Ciara Power <ciara.power@intel.com>
> > > >> ---
> > > >>  drivers/net/i40e/i40e_rxtx.c | 19 +++++++++++++------
> > > >>  1 file changed, 13 insertions(+), 6 deletions(-)
> > > >>
> > > >> diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > >> b/drivers/net/i40e/i40e_rxtx.c index 60b33d20a1..9b535b52fa
> > > >> 100644
> > > >> --- a/drivers/net/i40e/i40e_rxtx.c
> > > >> +++ b/drivers/net/i40e/i40e_rxtx.c
> > > >> @@ -3098,7 +3098,8 @@ static eth_rx_burst_t
> > > >> i40e_get_latest_rx_vec(bool
> > > >> scatter)  {  #if defined(RTE_ARCH_X86) &&
> > > >> defined(CC_AVX2_SUPPORT) -if
> > > >> (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> > > >> +if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> > > >> +rte_get_max_simd_bitwidth() >=
> > > >
> > > >Nitpick: I think if consistent to keep alignment for open
> > > >parenthesis in this patch set would be better. Do you think so?
> > > >
> > >
> > > This file doesn't seem to have any if statements indented as you
> > > suggest, Some do have a double indent for the continued line as I
> > > have done here though.
> > >
> >
> > Sorry, maybe I didn't say clear, what I said is the "CHECK" as below when
> use checkpatch.pl to guaranty the patch's format.
> >
> > CHECK: Alignment should match open parenthesis
> > #733: FILE: drivers/net/i40e/i40e_rxtx.c:3102:
> > +       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> > +                       rte_get_max_simd_bitwidth() >=
> > + RTE_MAX_256_SIMD)
> >
> Did you run checkpatch using the DPDK "checkpatches.sh" script? In that
> script there are a list of things to ignore, one of which is
> "PARENTHESIS_ALIGNMENT", so that should not be flagged here. It's also
> not flagged in patchwork by the CI system.
> 

Ok, seems that parenthesis alignment had been explicit ignored even I would prefer to make the format to be more consistent. @ power, you could choose keep it or not if there is a coming new version, that is both fine base on the rule.  

> /Bruce

^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (15 preceding siblings ...)
  2020-09-30 13:03 ` [dpdk-dev] [PATCH v3 00/18] add max SIMD bitwidth to EAL Ciara Power
@ 2020-10-13 10:38 ` Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 01/17] eal: add max SIMD bitwidth Ciara Power
                     ` (16 more replies)
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                   ` (4 subsequent siblings)
  21 siblings, 17 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power

A number of components in DPDK have optional AVX-512 or other vector
code paths which can be selected at runtime. Rather than having each
component provide its own mechanism to select a code path, this patchset
adds support for a single setting to control what code paths are used.
This can be used to enable some non-default code paths e.g. ones using
AVX-512, but also to limit the code paths to certain vector widths, or
to scalar code only, which is useful for testing.

The max SIMD bitwidth setting can be set by the app itself through use of
the available API, or can be overriden by a commandline argument passed by
the user.

v4:
  - Dropped LPM patch as the lookupx4 function is called from SSE/NEON headers,
    so is already on a vectorised path. Given the performance impact for an
    unnecessary change, it was decided the check is not needed.
  - Renamed enum values for readability.
  - Added patch to add check for node library.
  - Reworked net patch to choose default handlers rather than scalar by default.
  - Updated some Doxygen comments.
  - Fixed some other small comments on v3.
v3:
  - Added patch to add check for LPM lib
  - Modified default max bitwidth for Arm to disable max SIMD bitwidth,
    which will allow for SVE.
  - Added "0" as an acceptable value for command-line flag, which internally
    is used as UINT16_MAX to essentially disable max SIMD bitwidth limits.
  - Made suggested changes to net lib patch.
  - Rebased onto main.
v2:
  - Added some documentation.
  - Modified default max bitwidth for Arm.
  - Moved mlx5 condition check into existing check vec support function.
  - Added max SIMD bitwidth checks to some libraries.

Ciara Power (17):
  eal: add max SIMD bitwidth
  doc: add detail on using max SIMD bitwidth
  net/i40e: add checks for max SIMD bitwidth
  net/axgbe: add checks for max SIMD bitwidth
  net/bnxt: add checks for max SIMD bitwidth
  net/enic: add checks for max SIMD bitwidth
  net/fm10k: add checks for max SIMD bitwidth
  net/iavf: add checks for max SIMD bitwidth
  net/ice: add checks for max SIMD bitwidth
  net/ixgbe: add checks for max SIMD bitwidth
  net/mlx5: add checks for max SIMD bitwidth
  net/virtio: add checks for max SIMD bitwidth
  distributor: add checks for max SIMD bitwidth
  member: add checks for max SIMD bitwidth
  efd: add checks for max SIMD bitwidth
  net: add checks for max SIMD bitwidth
  node: choose vector path at runtime

 doc/guides/howto/avx512.rst                   | 36 +++++++++
 doc/guides/howto/index.rst                    |  1 +
 doc/guides/linux_gsg/eal_args.include.rst     | 16 ++++
 .../prog_guide/env_abstraction_layer.rst      | 32 ++++++++
 drivers/net/axgbe/axgbe_rxtx.c                |  3 +-
 drivers/net/bnxt/bnxt_ethdev.c                |  6 +-
 drivers/net/enic/enic_rxtx_vec_avx2.c         |  3 +-
 drivers/net/fm10k/fm10k_ethdev.c              | 11 ++-
 drivers/net/i40e/i40e_rxtx.c                  | 18 +++--
 drivers/net/iavf/iavf_rxtx.c                  | 16 ++--
 drivers/net/ice/ice_rxtx.c                    | 20 +++--
 drivers/net/ixgbe/ixgbe_rxtx.c                |  5 +-
 drivers/net/mlx5/mlx5_rxtx_vec.c              |  2 +
 drivers/net/virtio/virtio_ethdev.c            |  9 ++-
 lib/librte_distributor/rte_distributor.c      |  3 +-
 lib/librte_eal/arm/include/rte_vect.h         |  2 +
 lib/librte_eal/common/eal_common_options.c    | 66 ++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h      |  8 ++
 lib/librte_eal/common/eal_options.h           |  2 +
 lib/librte_eal/include/rte_eal.h              | 40 ++++++++++
 lib/librte_eal/ppc/include/rte_vect.h         |  2 +
 lib/librte_eal/rte_eal_exports.def            |  2 +
 lib/librte_eal/rte_eal_version.map            |  2 +
 lib/librte_eal/x86/include/rte_vect.h         |  2 +
 lib/librte_efd/rte_efd.c                      |  7 +-
 lib/librte_member/rte_member_ht.c             |  3 +-
 lib/librte_net/rte_net_crc.c                  | 75 ++++++++++++++++---
 lib/librte_net/rte_net_crc.h                  |  8 ++
 lib/librte_node/ip4_lookup.c                  | 13 +++-
 lib/librte_node/ip4_lookup_neon.h             |  2 +-
 lib/librte_node/ip4_lookup_sse.h              |  2 +-
 31 files changed, 371 insertions(+), 46 deletions(-)
 create mode 100644 doc/guides/howto/avx512.rst

-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 01/17] eal: add max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
@ 2020-10-13 10:38   ` Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 02/17] doc: add detail on using " Ciara Power
                     ` (15 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Honnappa Nagarahalli,
	Dmitry Kozlyuk, Narcisa Ana Maria Vasile, Dmitry Malloy,
	Pallavi Kadam, Ray Kinsella, Neil Horman

This patch adds a max SIMD bitwidth EAL configuration. The API allows
for an app to set this value. It can also be set using EAL argument
--force-max-simd-bitwidth, which will lock the value and override any
modifications made by the app.

Each arch has a define for the default SIMD bitwidth value, this is used
on EAL init to set the config max SIMD bitwidth.

Cc: Ruifeng Wang <ruifeng.wang@arm.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Cc: David Christensen <drc@linux.vnet.ibm.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4:
  - Used RTE_SIMD_MAX instead of UINT16_MAX.
  - Renamed enums to better reflect usage.
  - Added functions to windows symbol export file.
  - Modified Doxygen comments.
  - Modified enum name.
  - Changed RTE_SIMD_MAX value to a power of 2.
  - Merged patch 2 into this patch.
  - Enum now used for default value defines.
  - Fixed some small comments on v3.
v3:
  - Added enum value to essentially disable using max SIMD to choose
    paths, intended for use by ARM SVE.
  - Fixed parsing bitwidth argument to return an error for values
    greater than uint16_t.
  - Removed unnecessary define in generic rte_vect.h
  - Changed default bitwidth for ARM to UINT16_MAX, to allow for SVE.
v2:
  - Added to Doxygen comment for API.
  - Changed default bitwidth for Arm to 128.
---
 lib/librte_eal/arm/include/rte_vect.h      |  2 +
 lib/librte_eal/common/eal_common_options.c | 66 ++++++++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/include/rte_eal.h           | 40 +++++++++++++
 lib/librte_eal/ppc/include/rte_vect.h      |  2 +
 lib/librte_eal/rte_eal_exports.def         |  2 +
 lib/librte_eal/rte_eal_version.map         |  2 +
 lib/librte_eal/x86/include/rte_vect.h      |  2 +
 9 files changed, 126 insertions(+)

diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
index 01c51712a1..f53c89be97 100644
--- a/lib/librte_eal/arm/include/rte_vect.h
+++ b/lib/librte_eal/arm/include/rte_vect.h
@@ -14,6 +14,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH RTE_SIMD_MAX
+
 typedef int32x4_t xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index a5426e1234..8c79f1b2fc 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -35,6 +35,7 @@
 #ifndef RTE_EXEC_ENV_WINDOWS
 #include <rte_telemetry.h>
 #endif
+#include <rte_vect.h>
 
 #include "eal_internal_cfg.h"
 #include "eal_options.h"
@@ -102,6 +103,7 @@ eal_long_options[] = {
 	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
+	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
 	{0,                     0, NULL, 0                        }
 };
 
@@ -343,6 +345,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
 	CPU_ZERO(&internal_cfg->ctrl_cpuset);
 	internal_cfg->init_complete = 0;
+	internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
+	internal_cfg->max_simd_bitwidth.forced = 0;
 }
 
 static int
@@ -1309,6 +1313,34 @@ eal_parse_iova_mode(const char *name)
 	return 0;
 }
 
+static int
+eal_parse_simd_bitwidth(const char *arg)
+{
+	char *end;
+	unsigned long bitwidth;
+	int ret;
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+
+	if (arg == NULL || arg[0] == '\0')
+		return -1;
+
+	errno = 0;
+	bitwidth = strtoul(arg, &end, 0);
+
+	/* check for errors */
+	if (errno != 0 || end == NULL || *end != '\0' || bitwidth > RTE_SIMD_MAX)
+		return -1;
+
+	if (bitwidth == 0)
+		bitwidth = (unsigned long) RTE_SIMD_MAX;
+	ret = rte_set_max_simd_bitwidth(bitwidth);
+	if (ret < 0)
+		return -1;
+	internal_conf->max_simd_bitwidth.forced = 1;
+	return 0;
+}
+
 static int
 eal_parse_base_virtaddr(const char *arg)
 {
@@ -1707,6 +1739,13 @@ eal_parse_common_option(int opt, const char *optarg,
 	case OPT_NO_TELEMETRY_NUM:
 		conf->no_telemetry = 1;
 		break;
+	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
+		if (eal_parse_simd_bitwidth(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
+			return -1;
+		}
+		break;
 
 	/* don't know what to do, leave this to caller */
 	default:
@@ -1903,6 +1942,32 @@ eal_check_common_options(struct internal_config *internal_cfg)
 	return 0;
 }
 
+uint16_t
+rte_get_max_simd_bitwidth(void)
+{
+	const struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	return internal_conf->max_simd_bitwidth.bitwidth;
+}
+
+int
+rte_set_max_simd_bitwidth(uint16_t bitwidth)
+{
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	if (internal_conf->max_simd_bitwidth.forced) {
+		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
+		return -EPERM;
+	}
+
+	if (bitwidth < RTE_SIMD_DISABLED || !rte_is_power_of_2(bitwidth)) {
+		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
+		return -EINVAL;
+	}
+	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
+	return 0;
+}
+
 void
 eal_common_usage(void)
 {
@@ -1981,6 +2046,7 @@ eal_common_usage(void)
 	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
 	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
 	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
+	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
 	       "\nEAL options for DEBUG use only:\n"
 	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
 	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 13f93388a7..0c880cbe17 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -33,6 +33,12 @@ struct hugepage_info {
 	int lock_descriptor;    /**< file descriptor for hugepage dir */
 };
 
+struct simd_bitwidth {
+	bool forced;
+	/**< flag indicating if bitwidth is forced and can't be modified */
+	uint16_t bitwidth; /**< bitwidth value */
+};
+
 /**
  * internal configuration
  */
@@ -85,6 +91,8 @@ struct internal_config {
 	volatile unsigned int init_complete;
 	/**< indicates whether EAL has completed initialization */
 	unsigned int no_telemetry; /**< true to disable Telemetry */
+	struct simd_bitwidth max_simd_bitwidth;
+	/**< max simd bitwidth path to use */
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 89769d48b4..ef33979664 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -85,6 +85,8 @@ enum {
 	OPT_TELEMETRY_NUM,
 #define OPT_NO_TELEMETRY      "no-telemetry"
 	OPT_NO_TELEMETRY_NUM,
+#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
+	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index e3c2ef185e..706d3cca5a 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -43,6 +43,23 @@ enum rte_proc_type_t {
 	RTE_PROC_INVALID
 };
 
+/**
+ * The max SIMD bitwidth value to limit vector path selection.
+ */
+enum rte_max_simd {
+	RTE_SIMD_DISABLED = 64,
+	/**< Limits path selection to scalar, disables all vector paths. */
+	RTE_SIMD_128 = 128,
+	/**< Limits path selection to SSE/NEON/Altivec or below. */
+	RTE_SIMD_256 = 256, /**< Limits path selection to AVX2 or below. */
+	RTE_SIMD_512 = 512, /**< Limits path selection to AVX512 or below. */
+	RTE_SIMD_MAX = INT16_MAX + 1,
+	/**<
+	 * Disables limiting by max SIMD bitwidth, allows all suitable paths.
+	 * This value is used as it is a large number and a power of 2.
+	 */
+};
+
 /**
  * Get the process type in a multi-process setup
  *
@@ -51,6 +68,29 @@ enum rte_proc_type_t {
  */
 enum rte_proc_type_t rte_eal_process_type(void);
 
+/**
+ * Get the supported SIMD bitwidth.
+ *
+ * @return
+ *   uint16_t bitwidth.
+ */
+__rte_experimental
+uint16_t rte_get_max_simd_bitwidth(void);
+
+/**
+ * Set the supported SIMD bitwidth.
+ * This API should only be called once at initialization, before EAL init.
+ *
+ * @param bitwidth
+ *   uint16_t bitwidth.
+ * @return
+ *   - 0 on success.
+ *   - -EINVAL on invalid bitwidth parameter.
+ *   - -EPERM if bitwidth is forced.
+ */
+__rte_experimental
+int rte_set_max_simd_bitwidth(uint16_t bitwidth);
+
 /**
  * Request iopl privilege for all RPL.
  *
diff --git a/lib/librte_eal/ppc/include/rte_vect.h b/lib/librte_eal/ppc/include/rte_vect.h
index b0545c878c..a69aabc568 100644
--- a/lib/librte_eal/ppc/include/rte_vect.h
+++ b/lib/librte_eal/ppc/include/rte_vect.h
@@ -15,6 +15,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH RTE_SIMD_256
+
 typedef vector signed int xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/rte_eal_exports.def b/lib/librte_eal/rte_eal_exports.def
index 7b35beb702..81e99b00d9 100644
--- a/lib/librte_eal/rte_eal_exports.def
+++ b/lib/librte_eal/rte_eal_exports.def
@@ -26,6 +26,7 @@ EXPORTS
 	rte_eal_tailq_register
 	rte_eal_using_phys_addrs
 	rte_free
+	rte_get_max_simd_bitwidth
 	rte_get_tsc_hz
 	rte_hexdump
 	rte_intr_rx_ctl
@@ -62,6 +63,7 @@ EXPORTS
 	rte_memzone_reserve_aligned
 	rte_memzone_reserve_bounded
 	rte_memzone_walk
+	rte_set_max_simd_bitwidth
 	rte_socket_id
 	rte_strerror
 	rte_strsplit
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index a93dea9fe6..714be49377 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -400,6 +400,8 @@ EXPERIMENTAL {
 	# added in 20.11
 	__rte_eal_trace_generic_size_t;
 	rte_service_lcore_may_be_active;
+	rte_get_max_simd_bitwidth;
+	rte_set_max_simd_bitwidth;
 };
 
 INTERNAL {
diff --git a/lib/librte_eal/x86/include/rte_vect.h b/lib/librte_eal/x86/include/rte_vect.h
index df5a607623..a00d3d5a62 100644
--- a/lib/librte_eal/x86/include/rte_vect.h
+++ b/lib/librte_eal/x86/include/rte_vect.h
@@ -35,6 +35,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH RTE_SIMD_256
+
 typedef __m128i xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 02/17] doc: add detail on using max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 01/17] eal: add max SIMD bitwidth Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 03/17] net/i40e: add checks for " Ciara Power
                     ` (14 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Anatoly Burakov,
	John McNamara, Marko Kovacevic

This patch adds documentation on the usage of the max SIMD bitwidth EAL
setting, and how to use it to enable AVX-512 at runtime.

Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: John McNamara <john.mcnamara@intel.com>
Cc: Marko Kovacevic <marko.kovacevic@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated docs to reflect renamed enum.
v3:
  - Added enum value for disabling use of max SIMD to doc.
  - Added entry to HowTo index.
---
 doc/guides/howto/avx512.rst                   | 36 +++++++++++++++++++
 doc/guides/howto/index.rst                    |  1 +
 doc/guides/linux_gsg/eal_args.include.rst     | 16 +++++++++
 .../prog_guide/env_abstraction_layer.rst      | 32 +++++++++++++++++
 4 files changed, 85 insertions(+)
 create mode 100644 doc/guides/howto/avx512.rst

diff --git a/doc/guides/howto/avx512.rst b/doc/guides/howto/avx512.rst
new file mode 100644
index 0000000000..6eb3755775
--- /dev/null
+++ b/doc/guides/howto/avx512.rst
@@ -0,0 +1,36 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2020 Intel Corporation.
+
+
+Using AVX-512 with DPDK
+=======================
+
+AVX-512 is not used by default in DPDK, but it can be selected at runtime by apps through the use of EAL API,
+and by the user with a commandline argument. DPDK has a setting for max SIMD bitwidth,
+which can be modified and will then limit the vector path taken by the code.
+
+
+Using the API in apps
+---------------------
+
+Apps can request DPDK uses AVX-512 at runtime, if it provides improved application performance.
+This can be done by modifying the EAL setting for max SIMD bitwidth to 512, as by default it is 256,
+which does not allow for AVX-512.
+
+.. code-block:: c
+
+   rte_set_max_simd_bitwidth(RTE_SIMD_512);
+
+This API should only be called once at initialization, before EAL init.
+For more information on the possible enum values to use as a parameter, go to :ref:`max_simd_bitwidth`:
+
+
+Using the command-line argument
+---------------------------------------------
+
+The user can select to use AVX-512 at runtime, using the following argument to set the max bitwidth::
+
+   ./app/dpdk-testpmd --force-max-simd-bitwidth=512
+
+This will override any further changes to the max SIMD bitwidth in DPDK,
+which is useful for testing purposes.
diff --git a/doc/guides/howto/index.rst b/doc/guides/howto/index.rst
index 5a97ea508c..c2a2c60ddb 100644
--- a/doc/guides/howto/index.rst
+++ b/doc/guides/howto/index.rst
@@ -20,3 +20,4 @@ HowTo Guides
     telemetry
     debug_troubleshoot
     openwrt
+    avx512
diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
index 0fe4457968..a0bfbd1a98 100644
--- a/doc/guides/linux_gsg/eal_args.include.rst
+++ b/doc/guides/linux_gsg/eal_args.include.rst
@@ -210,3 +210,19 @@ Other options
 *    ``--no-telemetry``:
 
     Disable telemetry.
+
+*    ``--force-max-simd-bitwidth=<val>``:
+
+    Specify the maximum SIMD bitwidth size to handle. This limits which vector paths,
+    if any, are taken, as any paths taken must use a bitwidth below the max bitwidth limit.
+    For example, to allow all SIMD bitwidths up to and including AVX-512::
+
+        --force-max-simd-bitwidth=512
+
+    The following example shows limiting the bitwidth to 64-bits to disable all vector code::
+
+        --force-max-simd-bitwidth=64
+
+    To disable use of max SIMD bitwidth limit::
+
+        --force-max-simd-bitwidth=0
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 936c885081..04bb910386 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -486,6 +486,38 @@ the desired addressing mode when virtual devices that are not directly attached
 To facilitate forcing the IOVA mode to a specific value the EAL command line option ``--iova-mode`` can
 be used to select either physical addressing('pa') or virtual addressing('va').
 
+.. _max_simd_bitwidth:
+
+
+Max SIMD bitwidth
+~~~~~~~~~~~~~~~~~
+
+The EAL provides a single setting to limit the max SIMD bitwidth used by DPDK,
+which is used in determining the vector path, if any, chosen by a component.
+The value can be set at runtime by an application using the 'rte_set_max_simd_bitwidth(uint16_t bitwidth)' function,
+which should only be called once at initialization, before EAL init.
+The value can be overridden by the user using the EAL command-line option '--force-max-simd-bitwidth'.
+
+When choosing a vector path, along with checking the CPU feature support,
+the value of the max SIMD bitwidth must also be checked, and can be retrieved using the 'rte_get_max_simd_bitwidth()' function.
+The value should be compared against the enum values for accepted max SIMD bitwidths:
+
+.. code-block:: c
+
+   enum rte_max_simd {
+       RTE_SIMD_DISABLED = 64,
+       RTE_SIMD_128 = 128,
+       RTE_SIMD_256 = 256,
+       RTE_SIMD_512 = 512,
+       RTE_SIMD_MAX = UINT16_MAX,
+   };
+
+    if (rte_get_max_simd_bitwidth() >= RTE_SIMD_512)
+        /* Take AVX-512 vector path */
+    else if (rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
+        /* Take AVX2 vector path */
+
+
 Memory Segments and Memory Zones (memzone)
 ------------------------------------------
 
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 03/17] net/i40e: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 01/17] eal: add max SIMD bitwidth Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 02/17] doc: add detail on using " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 04/17] net/axgbe: " Ciara Power
                     ` (13 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Beilei Xing,
	Jeff Guo

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Beilei Xing <beilei.xing@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

---
v4: Updated enum names.
---
 drivers/net/i40e/i40e_rxtx.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 322fc1ed75..a6644b3efa 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3098,7 +3098,8 @@ static eth_rx_burst_t
 i40e_get_latest_rx_vec(bool scatter)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3115,7 +3116,8 @@ i40e_get_recommend_rx_vec(bool scatter)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3154,7 +3156,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 	}
 
-	if (ad->rx_vec_allowed) {
+	if (ad->rx_vec_allowed  && rte_get_max_simd_bitwidth()
+			>= RTE_SIMD_128) {
 		/* Vec Rx path */
 		PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
 				dev->data->port_id);
@@ -3268,7 +3271,8 @@ static eth_tx_burst_t
 i40e_get_latest_tx_vec(void)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3283,7 +3287,8 @@ i40e_get_recommend_tx_vec(void)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3311,7 +3316,8 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 	}
 
 	if (ad->tx_simple_allowed) {
-		if (ad->tx_vec_allowed) {
+		if (ad->tx_vec_allowed &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 			PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
 			if (ad->use_latest_vec)
 				dev->tx_pkt_burst =
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 04/17] net/axgbe: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (2 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 03/17] net/i40e: add checks for " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 05/17] net/bnxt: " Ciara Power
                     ` (12 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power,
	Somalapuram Amaranath

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Somalapuram Amaranath <asomalap@amd.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Amaranath Somalapuram <asomalap@amd.com>

---
v4: Updated enum name.
---
 drivers/net/axgbe/axgbe_rxtx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/axgbe/axgbe_rxtx.c b/drivers/net/axgbe/axgbe_rxtx.c
index bc93becaa5..5386bd86f8 100644
--- a/drivers/net/axgbe/axgbe_rxtx.c
+++ b/drivers/net/axgbe/axgbe_rxtx.c
@@ -557,7 +557,8 @@ int axgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!pdata->tx_queues)
 		pdata->tx_queues = dev->data->tx_queues;
 
-	if (txq->vector_disable)
+	if (txq->vector_disable || rte_get_max_simd_bitwidth()
+			< RTE_SIMD_128)
 		dev->tx_pkt_burst = &axgbe_xmit_pkts;
 	else
 #ifdef RTE_ARCH_X86
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 05/17] net/bnxt: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (3 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 04/17] net/axgbe: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 06/17] net/enic: " Ciara Power
                     ` (11 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Ajit Khaparde,
	Somnath Kotur

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
---
 drivers/net/bnxt/bnxt_ethdev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 8b63134c39..07d1a1a6ab 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1169,7 +1169,8 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev)
 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
 		DEV_RX_OFFLOAD_RSS_HASH |
 		DEV_RX_OFFLOAD_VLAN_FILTER)) &&
-	    !BNXT_TRUFLOW_EN(bp) && BNXT_NUM_ASYNC_CPR(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) && BNXT_NUM_ASYNC_CPR(bp) &&
+		rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 		PMD_DRV_LOG(INFO, "Using vector mode receive for port %d\n",
 			    eth_dev->data->port_id);
 		bp->flags |= BNXT_FLAG_RX_VECTOR_PKT_MODE;
@@ -1202,7 +1203,8 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 	 */
 	if (!eth_dev->data->scattered_rx &&
 	    !(offloads & ~DEV_TX_OFFLOAD_MBUF_FAST_FREE) &&
-	    !BNXT_TRUFLOW_EN(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) &&
+	    rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 		PMD_DRV_LOG(INFO, "Using vector mode transmit for port %d\n",
 			    eth_dev->data->port_id);
 		return bnxt_xmit_pkts_vec;
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 06/17] net/enic: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (4 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 05/17] net/bnxt: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 07/17] net/fm10k: " Ciara Power
                     ` (10 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, John Daley,
	Hyong Youb Kim

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: John Daley <johndale@cisco.com>
Cc: Hyong Youb Kim <hyonkim@cisco.com>

Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
---
 drivers/net/enic/enic_rxtx_vec_avx2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c b/drivers/net/enic/enic_rxtx_vec_avx2.c
index 676b9f5fdb..75e9172177 100644
--- a/drivers/net/enic/enic_rxtx_vec_avx2.c
+++ b/drivers/net/enic/enic_rxtx_vec_avx2.c
@@ -821,7 +821,8 @@ enic_use_vector_rx_handler(struct rte_eth_dev *eth_dev)
 	fconf = &eth_dev->data->dev_conf.fdir_conf;
 	if (fconf->mode != RTE_FDIR_MODE_NONE)
 		return false;
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256) {
 		ENICPMD_LOG(DEBUG, " use the non-scatter avx2 Rx handler");
 		eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
 		enic->use_noscatter_vec_rx_handler = 1;
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 07/17] net/fm10k: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (5 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 06/17] net/enic: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 08/17] net/iavf: " Ciara Power
                     ` (9 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Qi Zhang, Xiao Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qi Zhang <qi.z.zhang@intel.com>
Cc: Xiao Wang <xiao.w.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Qi Zhang <qi.z.zhang@intel.com>

---
v4: Updated enum name.
---
 drivers/net/fm10k/fm10k_ethdev.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c4a6fdf7f0..78c81bf35b 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -2937,7 +2937,9 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
 		/* primary process has set the ftag flag and offloads */
 		txq = dev->data->tx_queues[0];
-		if (fm10k_tx_vec_condition_check(txq)) {
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth()
+				< RTE_SIMD_128) {
 			dev->tx_pkt_burst = fm10k_xmit_pkts;
 			dev->tx_pkt_prepare = fm10k_prep_pkts;
 			PMD_INIT_LOG(DEBUG, "Use regular Tx func");
@@ -2956,7 +2958,8 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 		txq = dev->data->tx_queues[i];
 		txq->tx_ftag_en = tx_ftag_en;
 		/* Check if Vector Tx is satisfied */
-		if (fm10k_tx_vec_condition_check(txq))
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth() < RTE_SIMD_128)
 			use_sse = 0;
 	}
 
@@ -2990,7 +2993,9 @@ fm10k_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met.
 	 */
 	if (!fm10k_rx_vec_condition_check(dev) &&
-			dev_info->rx_vec_allowed && !rx_ftag_en) {
+			dev_info->rx_vec_allowed && !rx_ftag_en &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_SIMD_128) {
 		if (dev->data->scattered_rx)
 			dev->rx_pkt_burst = fm10k_recv_scattered_pkts_vec;
 		else
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 08/17] net/iavf: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (6 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 07/17] net/fm10k: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 09/17] net/ice: " Ciara Power
                     ` (8 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Jingjing Wu,
	Beilei Xing

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Jingjing Wu <jingjing.wu@intel.com>
Cc: Beilei Xing <beilei.xing@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
---
 drivers/net/iavf/iavf_rxtx.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 1b0efe0433..7c27d5beec 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2104,14 +2104,16 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_rx_vec_dev_check(dev)) {
+	if (!iavf_rx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
 			rxq = dev->data->rx_queues[i];
 			(void)iavf_rxq_vec_setup(rxq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 			use_avx2 = true;
 
 		if (dev->data->scattered_rx) {
@@ -2177,7 +2179,8 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_tx_vec_dev_check(dev)) {
+	if (!iavf_tx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
 			txq = dev->data->tx_queues[i];
 			if (!txq)
@@ -2185,8 +2188,9 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 			iavf_txq_vec_setup(txq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 			use_avx2 = true;
 
 		PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 09/17] net/ice: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (7 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 08/17] net/iavf: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 10/17] net/ixgbe: " Ciara Power
                     ` (7 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Qiming Yang,
	Qi Zhang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qiming Yang <qiming.yang@intel.com>
Cc: Qi Zhang <qi.z.zhang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
---
 drivers/net/ice/ice_rxtx.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 93a0ac6918..0003ce2afe 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -2989,7 +2989,9 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed) {
+		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_SIMD_128) {
 			ad->rx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
 				rxq = dev->data->rx_queues[i];
@@ -2999,8 +3001,10 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_SIMD_256)
 				use_avx2 = true;
 
 		} else {
@@ -3167,7 +3171,9 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_tx_vec_dev_check(dev)) {
+		if (!ice_tx_vec_dev_check(dev) &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_SIMD_128) {
 			ad->tx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_tx_queues; i++) {
 				txq = dev->data->tx_queues[i];
@@ -3177,8 +3183,10 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_SIMD_256)
 				use_avx2 = true;
 
 		} else {
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 10/17] net/ixgbe: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (8 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 09/17] net/ice: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 11/17] net/mlx5: " Ciara Power
                     ` (6 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Wei Zhao, Jeff Guo,
	Haiyue Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Wei Zhao <wei.zhao1@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

---
v4:
  - Updated enum name.
  - Moved placement of condition check.
  - Added condition check to tx cleanup path selection.
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 977ecf5137..d371647c0e 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -2404,6 +2404,7 @@ ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
 #endif
 			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_128 &&
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
 					txq->sw_ring_v != NULL)) {
 			return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
@@ -2502,6 +2503,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
 		PMD_INIT_LOG(DEBUG, "Using simple tx code path");
 		dev->tx_pkt_prepare = NULL;
 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_128 &&
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
 					ixgbe_txq_vec_setup(txq) == 0)) {
 			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
@@ -4743,7 +4745,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met and Rx Bulk Allocation should be allowed.
 	 */
 	if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
-	    !adapter->rx_bulk_alloc_allowed) {
+	    !adapter->rx_bulk_alloc_allowed ||
+			rte_get_max_simd_bitwidth() < RTE_SIMD_128) {
 		PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
 				    "preconditions",
 			     dev->data->port_id);
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 11/17] net/mlx5: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (9 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 10/17] net/ixgbe: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 12/17] net/virtio: " Ciara Power
                     ` (5 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Matan Azrad,
	Shahaf Shuler, Viacheslav Ovsiienko, Viacheslav Ovsiienko,
	Matan Azrad, Shahaf Shuler

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Matan Azrad <matan@mellanox.com>
Cc: Shahaf Shuler <shahafs@mellanox.com>
Cc: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

---
v4: Updated enum name.
v2: Moved check for max bitwidth into existing check vec
    support function.
---
 drivers/net/mlx5/mlx5_rxtx_vec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 711dcd35fa..49f1b61ff8 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -148,6 +148,8 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	uint32_t i;
 
+	if (rte_get_max_simd_bitwidth() < RTE_SIMD_128)
+		return -ENOTSUP;
 	if (!priv->config.rx_vec_en)
 		return -ENOTSUP;
 	if (mlx5_mprq_enabled(dev))
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 12/17] net/virtio: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (10 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 11/17] net/mlx5: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 13/17] distributor: " Ciara Power
                     ` (4 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Maxime Coquelin,
	Chenbo Xia, Zhihong Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: Chenbo Xia <chenbo.xia@intel.com>
Cc: Zhihong Wang <zhihong.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
v3: Moved max SIMD bitwidth check to configure function with other vec
    support checks.
---
 drivers/net/virtio/virtio_ethdev.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 0236c756dc..70955e5cc0 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -2313,7 +2313,8 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
 		     !vtpci_with_feature(hw, VIRTIO_F_IN_ORDER) ||
-		     !vtpci_with_feature(hw, VIRTIO_F_VERSION_1))) {
+		     !vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
+		     rte_get_max_simd_bitwidth() < RTE_SIMD_512)) {
 			PMD_DRV_LOG(INFO,
 				"disabled packed ring vectorized path for requirements not met");
 			hw->use_vec_rx = 0;
@@ -2366,6 +2367,12 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 					"disabled split ring vectorized rx for offloading enabled");
 				hw->use_vec_rx = 0;
 			}
+
+			if (rte_get_max_simd_bitwidth() < RTE_SIMD_128) {
+				PMD_DRV_LOG(INFO,
+					"disabled split ring vectorized rx, max SIMD bitwidth too low");
+				hw->use_vec_rx = 0;
+			}
 		}
 	}
 
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 13/17] distributor: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (11 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 12/17] net/virtio: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 14/17] member: " Ciara Power
                     ` (3 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, David Hunt

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: David Hunt <david.hunt@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: David Hunt <david.hunt@intel.com>

---
v4: Updated enum name.
---
 lib/librte_distributor/rte_distributor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c
index 1c047f065a..05e61dddfc 100644
--- a/lib/librte_distributor/rte_distributor.c
+++ b/lib/librte_distributor/rte_distributor.c
@@ -636,7 +636,8 @@ rte_distributor_create(const char *name,
 
 	d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
 #if defined(RTE_ARCH_X86)
-	d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
+	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
+		d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
 #endif
 
 	/*
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 14/17] member: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (12 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 13/17] distributor: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 15/17] efd: " Ciara Power
                     ` (2 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Yipeng Wang,
	Sameh Gobriel

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU
enabled path.

Cc: Yipeng Wang <yipeng1.wang@intel.com>
Cc: Sameh Gobriel <sameh.gobriel@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Yipeng Wang <yipeng1.wang@intel.com>

---
v4: Updated enum name.
---
 lib/librte_member/rte_member_ht.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c
index 3ea293a094..98c8aac248 100644
--- a/lib/librte_member/rte_member_ht.c
+++ b/lib/librte_member/rte_member_ht.c
@@ -113,7 +113,8 @@ rte_member_create_ht(struct rte_member_setsum *ss,
 	}
 #if defined(RTE_ARCH_X86)
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
-			RTE_MEMBER_BUCKET_ENTRIES == 16)
+			RTE_MEMBER_BUCKET_ENTRIES == 16 &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
 	else
 #endif
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 15/17] efd: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (13 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 14/17] member: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 16/17] net: " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 17/17] node: choose vector path at runtime Ciara Power
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Byron Marohn,
	Yipeng Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Byron Marohn <byron.marohn@intel.com>
Cc: Yipeng Wang <yipeng1.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Yipeng Wang <yipeng1.wang@intel.com>

---
v4: Updated enum name.
---
 lib/librte_efd/rte_efd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
index 6a799556d4..e925b73a9c 100644
--- a/lib/librte_efd/rte_efd.c
+++ b/lib/librte_efd/rte_efd.c
@@ -645,7 +645,9 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len,
 	 * For less than 4 bits, scalar function performs better
 	 * than vectorised version
 	 */
-	if (RTE_EFD_VALUE_NUM_BITS > 3 && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (RTE_EFD_VALUE_NUM_BITS > 3
+			&& rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)
+			&& rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		table->lookup_fn = EFD_LOOKUP_AVX2;
 	else
 #endif
@@ -655,7 +657,8 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len,
 	 * than vectorised version
 	 */
 	if (RTE_EFD_VALUE_NUM_BITS > 16 &&
-	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
 		table->lookup_fn = EFD_LOOKUP_NEON;
 	else
 #endif
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 16/17] net: add checks for max SIMD bitwidth
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (14 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 15/17] efd: " Ciara Power
@ 2020-10-13 10:38   ` " Ciara Power
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 17/17] node: choose vector path at runtime Ciara Power
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Jasvinder Singh,
	Olivier Matz

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

The vector path was initially chosen in RTE_INIT, however this is no
longer suitable as we cannot check the max SIMD bitwidth at that time.
Default handlers are now chosen in RTE_INIT, these default handlers
are used the first time the crc calc is called, and they set the suitable
handlers to be used going forward.

Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
Suggested-by: Olivier Matz <olivier.matz@6wind.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4:
  - Added default handlers to be set at RTE_INIT time, rather than
    choosing scalar handlers.
  - Modified logging.
  - Updated enum name.
v3:
  - Moved choosing vector paths out of RTE_INIT.
  - Moved checking max_simd_bitwidth into the set_alg function.
---
 lib/librte_net/rte_net_crc.c | 75 ++++++++++++++++++++++++++++++------
 lib/librte_net/rte_net_crc.h |  8 ++++
 2 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
index 4f5b9e8286..11d0161a32 100644
--- a/lib/librte_net/rte_net_crc.c
+++ b/lib/librte_net/rte_net_crc.c
@@ -9,6 +9,7 @@
 #include <rte_cpuflags.h>
 #include <rte_common.h>
 #include <rte_net_crc.h>
+#include <rte_eal.h>
 
 #if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__)
 #define X86_64_SSE42_PCLMULQDQ     1
@@ -32,6 +33,12 @@
 static uint32_t crc32_eth_lut[CRC_LUT_SIZE];
 static uint32_t crc16_ccitt_lut[CRC_LUT_SIZE];
 
+static uint32_t
+rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len);
+
+static uint32_t
+rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len);
+
 static uint32_t
 rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len);
 
@@ -41,7 +48,12 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len);
 typedef uint32_t
 (*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len);
 
-static rte_net_crc_handler *handlers;
+static rte_net_crc_handler handlers_default[] = {
+	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_default_handler,
+	[RTE_NET_CRC32_ETH] = rte_crc32_eth_default_handler,
+};
+
+static rte_net_crc_handler *handlers = handlers_default;
 
 static rte_net_crc_handler handlers_scalar[] = {
 	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler,
@@ -60,6 +72,9 @@ static rte_net_crc_handler handlers_neon[] = {
 };
 #endif
 
+static uint16_t max_simd_bitwidth;
+RTE_LOG_REGISTER(libnet_logtype, lib.net, INFO);
+
 /**
  * Reflect the bits about the middle
  *
@@ -112,6 +127,42 @@ crc32_eth_calc_lut(const uint8_t *data,
 	return crc;
 }
 
+static uint32_t
+rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len)
+{
+	if (max_simd_bitwidth == 0)
+		max_simd_bitwidth = rte_get_max_simd_bitwidth();
+	handlers = handlers_scalar;
+#ifdef X86_64_SSE42_PCLMULQDQ
+	if (max_simd_bitwidth >= RTE_SIMD_128)
+		handlers = handlers_sse42;
+#endif
+#ifdef ARM64_NEON_PMULL
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
+			max_simd_bitwidth >= RTE_SIMD_128) {
+		handlers = handlers_neon;
+#endif
+	return handlers[RTE_NET_CRC16_CCITT](data, data_len);
+}
+
+static uint32_t
+rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
+{
+	if (max_simd_bitwidth == 0)
+		max_simd_bitwidth = rte_get_max_simd_bitwidth();
+	handlers = handlers_scalar;
+#ifdef X86_64_SSE42_PCLMULQDQ
+	if (max_simd_bitwidth >= RTE_SIMD_128)
+		handlers = handlers_sse42;
+#endif
+#ifdef ARM64_NEON_PMULL
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
+			max_simd_bitwidth >= RTE_SIMD_128) {
+		handlers = handlers_neon;
+#endif
+	return handlers[RTE_NET_CRC32_ETH](data, data_len);
+}
+
 static void
 rte_net_crc_scalar_init(void)
 {
@@ -145,18 +196,26 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
 void
 rte_net_crc_set_alg(enum rte_net_crc_alg alg)
 {
+	if (max_simd_bitwidth == 0)
+		max_simd_bitwidth = rte_get_max_simd_bitwidth();
+
 	switch (alg) {
 #ifdef X86_64_SSE42_PCLMULQDQ
 	case RTE_NET_CRC_SSE42:
-		handlers = handlers_sse42;
-		break;
+		if (max_simd_bitwidth >= RTE_SIMD_128) {
+			handlers = handlers_sse42;
+			return;
+		}
+		NET_LOG(INFO, "Max SIMD Bitwidth too low, can't use SSE\n");
 #elif defined ARM64_NEON_PMULL
 		/* fall-through */
 	case RTE_NET_CRC_NEON:
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
+		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
+				max_simd_bitwidth >= RTE_SIMD_128) {
 			handlers = handlers_neon;
-			break;
+			return;
 		}
+		NET_LOG(INFO, "Max SIMD Bitwidth too low or CPU flag not enabled, can't use NEON\n");
 #endif
 		/* fall-through */
 	case RTE_NET_CRC_SCALAR:
@@ -184,19 +243,13 @@ rte_net_crc_calc(const void *data,
 /* Select highest available crc algorithm as default one */
 RTE_INIT(rte_net_crc_init)
 {
-	enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
-
 	rte_net_crc_scalar_init();
 
 #ifdef X86_64_SSE42_PCLMULQDQ
-	alg = RTE_NET_CRC_SSE42;
 	rte_net_crc_sse42_init();
 #elif defined ARM64_NEON_PMULL
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
-		alg = RTE_NET_CRC_NEON;
 		rte_net_crc_neon_init();
 	}
 #endif
-
-	rte_net_crc_set_alg(alg);
 }
diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
index 16e85ca970..c942865ecf 100644
--- a/lib/librte_net/rte_net_crc.h
+++ b/lib/librte_net/rte_net_crc.h
@@ -7,6 +7,8 @@
 
 #include <stdint.h>
 
+#include <rte_log.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -25,6 +27,12 @@ enum rte_net_crc_alg {
 	RTE_NET_CRC_NEON,
 };
 
+extern int libnet_logtype;
+
+#define NET_LOG(level, fmt, args...)					\
+	rte_log(RTE_LOG_ ## level, libnet_logtype, "%s(): " fmt "\n",	\
+		__func__, ## args)
+
 /**
  * This API set the CRC computation algorithm (i.e. scalar version,
  * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v4 17/17] node: choose vector path at runtime
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (15 preceding siblings ...)
  2020-10-13 10:38   ` [dpdk-dev] [PATCH v4 16/17] net: " Ciara Power
@ 2020-10-13 10:38   ` Ciara Power
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 10:38 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, --dry-run, Ciara Power, Nithin Dabilpuram,
	Pavan Nikhilesh, Kiran Kumar K

When choosing the vector path, max SIMD bitwidth is now checked to
ensure the vector path is suitable. To do this, rather than the
scalar/vector lookup functions being called directly from the apps, a
generic function is called which will then call the scalar or vector
lookup function.

Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Kiran Kumar K <kirankumark@marvell.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
 lib/librte_node/ip4_lookup_neon.h |  2 +-
 lib/librte_node/ip4_lookup_sse.h  |  2 +-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
index 293c77f39e..b3edbc1f4d 100644
--- a/lib/librte_node/ip4_lookup.c
+++ b/lib/librte_node/ip4_lookup.c
@@ -34,10 +34,10 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
 #include "ip4_lookup_neon.h"
 #elif defined(RTE_ARCH_X86)
 #include "ip4_lookup_sse.h"
-#else
+#endif
 
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_ipv4_hdr *ipv4_hdr;
@@ -109,7 +109,16 @@ ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
 	return nb_objs;
 }
 
+static uint16_t
+ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+			void **objs, uint16_t nb_objs)
+{
+#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
+	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
+		return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
 #endif
+	return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
+}
 
 int
 rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop,
diff --git a/lib/librte_node/ip4_lookup_neon.h b/lib/librte_node/ip4_lookup_neon.h
index 5e5a7d87be..0ad2763b82 100644
--- a/lib/librte_node/ip4_lookup_neon.h
+++ b/lib/librte_node/ip4_lookup_neon.h
@@ -7,7 +7,7 @@
 
 /* ARM64 NEON */
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
diff --git a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
index a071cc5919..264c986071 100644
--- a/lib/librte_node/ip4_lookup_sse.h
+++ b/lib/librte_node/ip4_lookup_sse.h
@@ -7,7 +7,7 @@
 
 /* X86 SSE */
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL
  2020-08-07 15:58 [dpdk-dev] [PATCH 20.11 00/12] add max SIMD bitwidth to EAL Ciara Power
                   ` (16 preceding siblings ...)
  2020-10-13 10:38 ` [dpdk-dev] [PATCH v4 00/17] add max SIMD bitwidth to EAL Ciara Power
@ 2020-10-13 11:04 ` Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 01/17] eal: add max SIMD bitwidth Ciara Power
                     ` (16 more replies)
  2020-10-15 10:37 ` [dpdk-dev] [PATCH v6 00/18] add max SIMD bitwidth to EAL Ciara Power
                   ` (3 subsequent siblings)
  21 siblings, 17 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power

A number of components in DPDK have optional AVX-512 or other vector
code paths which can be selected at runtime. Rather than having each
component provide its own mechanism to select a code path, this patchset
adds support for a single setting to control what code paths are used.
This can be used to enable some non-default code paths e.g. ones using
AVX-512, but also to limit the code paths to certain vector widths, or
to scalar code only, which is useful for testing.

The max SIMD bitwidth setting can be set by the app itself through use of
the available API, or can be overriden by a commandline argument passed by
the user.

v5: Fixed cc list.
v4:
  - Dropped LPM patch as the lookupx4 function is called from SSE/NEON headers,
    so is already on a vectorised path. Given the performance impact for an
    unnecessary change, it was decided the check is not needed.
  - Renamed enum values for readability.
  - Added patch to add check for node library.
  - Reworked net patch to choose default handlers rather than scalar by default.
  - Updated some Doxygen comments.
  - Fixed some other small comments on v3.
v3:
  - Added patch to add check for LPM lib
  - Modified default max bitwidth for Arm to disable max SIMD bitwidth,
    which will allow for SVE.
  - Added "0" as an acceptable value for command-line flag, which internally
    is used as UINT16_MAX to essentially disable max SIMD bitwidth limits.
  - Made suggested changes to net lib patch.
  - Rebased onto main.
v2:
  - Added some documentation.
  - Modified default max bitwidth for Arm.
  - Moved mlx5 condition check into existing check vec support function.
  - Added max SIMD bitwidth checks to some libraries.

Ciara Power (17):
  eal: add max SIMD bitwidth
  doc: add detail on using max SIMD bitwidth
  net/i40e: add checks for max SIMD bitwidth
  net/axgbe: add checks for max SIMD bitwidth
  net/bnxt: add checks for max SIMD bitwidth
  net/enic: add checks for max SIMD bitwidth
  net/fm10k: add checks for max SIMD bitwidth
  net/iavf: add checks for max SIMD bitwidth
  net/ice: add checks for max SIMD bitwidth
  net/ixgbe: add checks for max SIMD bitwidth
  net/mlx5: add checks for max SIMD bitwidth
  net/virtio: add checks for max SIMD bitwidth
  distributor: add checks for max SIMD bitwidth
  member: add checks for max SIMD bitwidth
  efd: add checks for max SIMD bitwidth
  net: add checks for max SIMD bitwidth
  node: choose vector path at runtime

 doc/guides/howto/avx512.rst                   | 36 +++++++++
 doc/guides/howto/index.rst                    |  1 +
 doc/guides/linux_gsg/eal_args.include.rst     | 16 ++++
 .../prog_guide/env_abstraction_layer.rst      | 32 ++++++++
 drivers/net/axgbe/axgbe_rxtx.c                |  3 +-
 drivers/net/bnxt/bnxt_ethdev.c                |  6 +-
 drivers/net/enic/enic_rxtx_vec_avx2.c         |  3 +-
 drivers/net/fm10k/fm10k_ethdev.c              | 11 ++-
 drivers/net/i40e/i40e_rxtx.c                  | 18 +++--
 drivers/net/iavf/iavf_rxtx.c                  | 16 ++--
 drivers/net/ice/ice_rxtx.c                    | 20 +++--
 drivers/net/ixgbe/ixgbe_rxtx.c                |  5 +-
 drivers/net/mlx5/mlx5_rxtx_vec.c              |  2 +
 drivers/net/virtio/virtio_ethdev.c            |  9 ++-
 lib/librte_distributor/rte_distributor.c      |  3 +-
 lib/librte_eal/arm/include/rte_vect.h         |  2 +
 lib/librte_eal/common/eal_common_options.c    | 66 ++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h      |  8 ++
 lib/librte_eal/common/eal_options.h           |  2 +
 lib/librte_eal/include/rte_eal.h              | 40 ++++++++++
 lib/librte_eal/ppc/include/rte_vect.h         |  2 +
 lib/librte_eal/rte_eal_exports.def            |  2 +
 lib/librte_eal/rte_eal_version.map            |  2 +
 lib/librte_eal/x86/include/rte_vect.h         |  2 +
 lib/librte_efd/rte_efd.c                      |  7 +-
 lib/librte_member/rte_member_ht.c             |  3 +-
 lib/librte_net/rte_net_crc.c                  | 75 ++++++++++++++++---
 lib/librte_net/rte_net_crc.h                  |  8 ++
 lib/librte_node/ip4_lookup.c                  | 13 +++-
 lib/librte_node/ip4_lookup_neon.h             |  2 +-
 lib/librte_node/ip4_lookup_sse.h              |  2 +-
 31 files changed, 371 insertions(+), 46 deletions(-)
 create mode 100644 doc/guides/howto/avx512.rst

-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 01/17] eal: add max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
@ 2020-10-13 11:04   ` Ciara Power
  2020-10-13 11:58     ` Ananyev, Konstantin
  2020-10-14  8:50     ` Ruifeng Wang
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 02/17] doc: add detail on using " Ciara Power
                     ` (15 subsequent siblings)
  16 siblings, 2 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Honnappa Nagarahalli,
	Dmitry Kozlyuk, Narcisa Ana Maria Vasile, Dmitry Malloy,
	Pallavi Kadam, Ray Kinsella, Neil Horman

This patch adds a max SIMD bitwidth EAL configuration. The API allows
for an app to set this value. It can also be set using EAL argument
--force-max-simd-bitwidth, which will lock the value and override any
modifications made by the app.

Each arch has a define for the default SIMD bitwidth value, this is used
on EAL init to set the config max SIMD bitwidth.

Cc: Ruifeng Wang <ruifeng.wang@arm.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Cc: David Christensen <drc@linux.vnet.ibm.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4:
  - Used RTE_SIMD_MAX instead of UINT16_MAX.
  - Renamed enums to better reflect usage.
  - Added functions to windows symbol export file.
  - Modified Doxygen comments.
  - Modified enum name.
  - Changed RTE_SIMD_MAX value to a power of 2.
  - Merged patch 2 into this patch.
  - Enum now used for default value defines.
  - Fixed some small comments on v3.
v3:
  - Added enum value to essentially disable using max SIMD to choose
    paths, intended for use by ARM SVE.
  - Fixed parsing bitwidth argument to return an error for values
    greater than uint16_t.
  - Removed unnecessary define in generic rte_vect.h
  - Changed default bitwidth for ARM to UINT16_MAX, to allow for SVE.
v2:
  - Added to Doxygen comment for API.
  - Changed default bitwidth for Arm to 128.
---
 lib/librte_eal/arm/include/rte_vect.h      |  2 +
 lib/librte_eal/common/eal_common_options.c | 66 ++++++++++++++++++++++
 lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/include/rte_eal.h           | 40 +++++++++++++
 lib/librte_eal/ppc/include/rte_vect.h      |  2 +
 lib/librte_eal/rte_eal_exports.def         |  2 +
 lib/librte_eal/rte_eal_version.map         |  2 +
 lib/librte_eal/x86/include/rte_vect.h      |  2 +
 9 files changed, 126 insertions(+)

diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
index 01c51712a1..f53c89be97 100644
--- a/lib/librte_eal/arm/include/rte_vect.h
+++ b/lib/librte_eal/arm/include/rte_vect.h
@@ -14,6 +14,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH RTE_SIMD_MAX
+
 typedef int32x4_t xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index a5426e1234..8c79f1b2fc 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -35,6 +35,7 @@
 #ifndef RTE_EXEC_ENV_WINDOWS
 #include <rte_telemetry.h>
 #endif
+#include <rte_vect.h>
 
 #include "eal_internal_cfg.h"
 #include "eal_options.h"
@@ -102,6 +103,7 @@ eal_long_options[] = {
 	{OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
+	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
 	{0,                     0, NULL, 0                        }
 };
 
@@ -343,6 +345,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
 	CPU_ZERO(&internal_cfg->ctrl_cpuset);
 	internal_cfg->init_complete = 0;
+	internal_cfg->max_simd_bitwidth.bitwidth = RTE_DEFAULT_SIMD_BITWIDTH;
+	internal_cfg->max_simd_bitwidth.forced = 0;
 }
 
 static int
@@ -1309,6 +1313,34 @@ eal_parse_iova_mode(const char *name)
 	return 0;
 }
 
+static int
+eal_parse_simd_bitwidth(const char *arg)
+{
+	char *end;
+	unsigned long bitwidth;
+	int ret;
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+
+	if (arg == NULL || arg[0] == '\0')
+		return -1;
+
+	errno = 0;
+	bitwidth = strtoul(arg, &end, 0);
+
+	/* check for errors */
+	if (errno != 0 || end == NULL || *end != '\0' || bitwidth > RTE_SIMD_MAX)
+		return -1;
+
+	if (bitwidth == 0)
+		bitwidth = (unsigned long) RTE_SIMD_MAX;
+	ret = rte_set_max_simd_bitwidth(bitwidth);
+	if (ret < 0)
+		return -1;
+	internal_conf->max_simd_bitwidth.forced = 1;
+	return 0;
+}
+
 static int
 eal_parse_base_virtaddr(const char *arg)
 {
@@ -1707,6 +1739,13 @@ eal_parse_common_option(int opt, const char *optarg,
 	case OPT_NO_TELEMETRY_NUM:
 		conf->no_telemetry = 1;
 		break;
+	case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
+		if (eal_parse_simd_bitwidth(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+					OPT_FORCE_MAX_SIMD_BITWIDTH "\n");
+			return -1;
+		}
+		break;
 
 	/* don't know what to do, leave this to caller */
 	default:
@@ -1903,6 +1942,32 @@ eal_check_common_options(struct internal_config *internal_cfg)
 	return 0;
 }
 
+uint16_t
+rte_get_max_simd_bitwidth(void)
+{
+	const struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	return internal_conf->max_simd_bitwidth.bitwidth;
+}
+
+int
+rte_set_max_simd_bitwidth(uint16_t bitwidth)
+{
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	if (internal_conf->max_simd_bitwidth.forced) {
+		RTE_LOG(NOTICE, EAL, "Cannot set max SIMD bitwidth - user runtime override enabled");
+		return -EPERM;
+	}
+
+	if (bitwidth < RTE_SIMD_DISABLED || !rte_is_power_of_2(bitwidth)) {
+		RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n");
+		return -EINVAL;
+	}
+	internal_conf->max_simd_bitwidth.bitwidth = bitwidth;
+	return 0;
+}
+
 void
 eal_common_usage(void)
 {
@@ -1981,6 +2046,7 @@ eal_common_usage(void)
 	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
 	       "  --"OPT_TELEMETRY"   Enable telemetry support (on by default)\n"
 	       "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
+	       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD bitwidth\n"
 	       "\nEAL options for DEBUG use only:\n"
 	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
 	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 13f93388a7..0c880cbe17 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -33,6 +33,12 @@ struct hugepage_info {
 	int lock_descriptor;    /**< file descriptor for hugepage dir */
 };
 
+struct simd_bitwidth {
+	bool forced;
+	/**< flag indicating if bitwidth is forced and can't be modified */
+	uint16_t bitwidth; /**< bitwidth value */
+};
+
 /**
  * internal configuration
  */
@@ -85,6 +91,8 @@ struct internal_config {
 	volatile unsigned int init_complete;
 	/**< indicates whether EAL has completed initialization */
 	unsigned int no_telemetry; /**< true to disable Telemetry */
+	struct simd_bitwidth max_simd_bitwidth;
+	/**< max simd bitwidth path to use */
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 89769d48b4..ef33979664 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -85,6 +85,8 @@ enum {
 	OPT_TELEMETRY_NUM,
 #define OPT_NO_TELEMETRY      "no-telemetry"
 	OPT_NO_TELEMETRY_NUM,
+#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
+	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index e3c2ef185e..706d3cca5a 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -43,6 +43,23 @@ enum rte_proc_type_t {
 	RTE_PROC_INVALID
 };
 
+/**
+ * The max SIMD bitwidth value to limit vector path selection.
+ */
+enum rte_max_simd {
+	RTE_SIMD_DISABLED = 64,
+	/**< Limits path selection to scalar, disables all vector paths. */
+	RTE_SIMD_128 = 128,
+	/**< Limits path selection to SSE/NEON/Altivec or below. */
+	RTE_SIMD_256 = 256, /**< Limits path selection to AVX2 or below. */
+	RTE_SIMD_512 = 512, /**< Limits path selection to AVX512 or below. */
+	RTE_SIMD_MAX = INT16_MAX + 1,
+	/**<
+	 * Disables limiting by max SIMD bitwidth, allows all suitable paths.
+	 * This value is used as it is a large number and a power of 2.
+	 */
+};
+
 /**
  * Get the process type in a multi-process setup
  *
@@ -51,6 +68,29 @@ enum rte_proc_type_t {
  */
 enum rte_proc_type_t rte_eal_process_type(void);
 
+/**
+ * Get the supported SIMD bitwidth.
+ *
+ * @return
+ *   uint16_t bitwidth.
+ */
+__rte_experimental
+uint16_t rte_get_max_simd_bitwidth(void);
+
+/**
+ * Set the supported SIMD bitwidth.
+ * This API should only be called once at initialization, before EAL init.
+ *
+ * @param bitwidth
+ *   uint16_t bitwidth.
+ * @return
+ *   - 0 on success.
+ *   - -EINVAL on invalid bitwidth parameter.
+ *   - -EPERM if bitwidth is forced.
+ */
+__rte_experimental
+int rte_set_max_simd_bitwidth(uint16_t bitwidth);
+
 /**
  * Request iopl privilege for all RPL.
  *
diff --git a/lib/librte_eal/ppc/include/rte_vect.h b/lib/librte_eal/ppc/include/rte_vect.h
index b0545c878c..a69aabc568 100644
--- a/lib/librte_eal/ppc/include/rte_vect.h
+++ b/lib/librte_eal/ppc/include/rte_vect.h
@@ -15,6 +15,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH RTE_SIMD_256
+
 typedef vector signed int xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
diff --git a/lib/librte_eal/rte_eal_exports.def b/lib/librte_eal/rte_eal_exports.def
index 7b35beb702..81e99b00d9 100644
--- a/lib/librte_eal/rte_eal_exports.def
+++ b/lib/librte_eal/rte_eal_exports.def
@@ -26,6 +26,7 @@ EXPORTS
 	rte_eal_tailq_register
 	rte_eal_using_phys_addrs
 	rte_free
+	rte_get_max_simd_bitwidth
 	rte_get_tsc_hz
 	rte_hexdump
 	rte_intr_rx_ctl
@@ -62,6 +63,7 @@ EXPORTS
 	rte_memzone_reserve_aligned
 	rte_memzone_reserve_bounded
 	rte_memzone_walk
+	rte_set_max_simd_bitwidth
 	rte_socket_id
 	rte_strerror
 	rte_strsplit
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index a93dea9fe6..714be49377 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -400,6 +400,8 @@ EXPERIMENTAL {
 	# added in 20.11
 	__rte_eal_trace_generic_size_t;
 	rte_service_lcore_may_be_active;
+	rte_get_max_simd_bitwidth;
+	rte_set_max_simd_bitwidth;
 };
 
 INTERNAL {
diff --git a/lib/librte_eal/x86/include/rte_vect.h b/lib/librte_eal/x86/include/rte_vect.h
index df5a607623..a00d3d5a62 100644
--- a/lib/librte_eal/x86/include/rte_vect.h
+++ b/lib/librte_eal/x86/include/rte_vect.h
@@ -35,6 +35,8 @@
 extern "C" {
 #endif
 
+#define RTE_DEFAULT_SIMD_BITWIDTH RTE_SIMD_256
+
 typedef __m128i xmm_t;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 02/17] doc: add detail on using max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 01/17] eal: add max SIMD bitwidth Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-14  8:24     ` Ruifeng Wang
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 03/17] net/i40e: add checks for " Ciara Power
                     ` (14 subsequent siblings)
  16 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Anatoly Burakov, John McNamara,
	Marko Kovacevic

This patch adds documentation on the usage of the max SIMD bitwidth EAL
setting, and how to use it to enable AVX-512 at runtime.

Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: John McNamara <john.mcnamara@intel.com>
Cc: Marko Kovacevic <marko.kovacevic@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated docs to reflect renamed enum.
v3:
  - Added enum value for disabling use of max SIMD to doc.
  - Added entry to HowTo index.
---
 doc/guides/howto/avx512.rst                   | 36 +++++++++++++++++++
 doc/guides/howto/index.rst                    |  1 +
 doc/guides/linux_gsg/eal_args.include.rst     | 16 +++++++++
 .../prog_guide/env_abstraction_layer.rst      | 32 +++++++++++++++++
 4 files changed, 85 insertions(+)
 create mode 100644 doc/guides/howto/avx512.rst

diff --git a/doc/guides/howto/avx512.rst b/doc/guides/howto/avx512.rst
new file mode 100644
index 0000000000..6eb3755775
--- /dev/null
+++ b/doc/guides/howto/avx512.rst
@@ -0,0 +1,36 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2020 Intel Corporation.
+
+
+Using AVX-512 with DPDK
+=======================
+
+AVX-512 is not used by default in DPDK, but it can be selected at runtime by apps through the use of EAL API,
+and by the user with a commandline argument. DPDK has a setting for max SIMD bitwidth,
+which can be modified and will then limit the vector path taken by the code.
+
+
+Using the API in apps
+---------------------
+
+Apps can request DPDK uses AVX-512 at runtime, if it provides improved application performance.
+This can be done by modifying the EAL setting for max SIMD bitwidth to 512, as by default it is 256,
+which does not allow for AVX-512.
+
+.. code-block:: c
+
+   rte_set_max_simd_bitwidth(RTE_SIMD_512);
+
+This API should only be called once at initialization, before EAL init.
+For more information on the possible enum values to use as a parameter, go to :ref:`max_simd_bitwidth`:
+
+
+Using the command-line argument
+---------------------------------------------
+
+The user can select to use AVX-512 at runtime, using the following argument to set the max bitwidth::
+
+   ./app/dpdk-testpmd --force-max-simd-bitwidth=512
+
+This will override any further changes to the max SIMD bitwidth in DPDK,
+which is useful for testing purposes.
diff --git a/doc/guides/howto/index.rst b/doc/guides/howto/index.rst
index 5a97ea508c..c2a2c60ddb 100644
--- a/doc/guides/howto/index.rst
+++ b/doc/guides/howto/index.rst
@@ -20,3 +20,4 @@ HowTo Guides
     telemetry
     debug_troubleshoot
     openwrt
+    avx512
diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
index 0fe4457968..a0bfbd1a98 100644
--- a/doc/guides/linux_gsg/eal_args.include.rst
+++ b/doc/guides/linux_gsg/eal_args.include.rst
@@ -210,3 +210,19 @@ Other options
 *    ``--no-telemetry``:
 
     Disable telemetry.
+
+*    ``--force-max-simd-bitwidth=<val>``:
+
+    Specify the maximum SIMD bitwidth size to handle. This limits which vector paths,
+    if any, are taken, as any paths taken must use a bitwidth below the max bitwidth limit.
+    For example, to allow all SIMD bitwidths up to and including AVX-512::
+
+        --force-max-simd-bitwidth=512
+
+    The following example shows limiting the bitwidth to 64-bits to disable all vector code::
+
+        --force-max-simd-bitwidth=64
+
+    To disable use of max SIMD bitwidth limit::
+
+        --force-max-simd-bitwidth=0
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 936c885081..04bb910386 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -486,6 +486,38 @@ the desired addressing mode when virtual devices that are not directly attached
 To facilitate forcing the IOVA mode to a specific value the EAL command line option ``--iova-mode`` can
 be used to select either physical addressing('pa') or virtual addressing('va').
 
+.. _max_simd_bitwidth:
+
+
+Max SIMD bitwidth
+~~~~~~~~~~~~~~~~~
+
+The EAL provides a single setting to limit the max SIMD bitwidth used by DPDK,
+which is used in determining the vector path, if any, chosen by a component.
+The value can be set at runtime by an application using the 'rte_set_max_simd_bitwidth(uint16_t bitwidth)' function,
+which should only be called once at initialization, before EAL init.
+The value can be overridden by the user using the EAL command-line option '--force-max-simd-bitwidth'.
+
+When choosing a vector path, along with checking the CPU feature support,
+the value of the max SIMD bitwidth must also be checked, and can be retrieved using the 'rte_get_max_simd_bitwidth()' function.
+The value should be compared against the enum values for accepted max SIMD bitwidths:
+
+.. code-block:: c
+
+   enum rte_max_simd {
+       RTE_SIMD_DISABLED = 64,
+       RTE_SIMD_128 = 128,
+       RTE_SIMD_256 = 256,
+       RTE_SIMD_512 = 512,
+       RTE_SIMD_MAX = UINT16_MAX,
+   };
+
+    if (rte_get_max_simd_bitwidth() >= RTE_SIMD_512)
+        /* Take AVX-512 vector path */
+    else if (rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
+        /* Take AVX2 vector path */
+
+
 Memory Segments and Memory Zones (memzone)
 ------------------------------------------
 
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 03/17] net/i40e: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 01/17] eal: add max SIMD bitwidth Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 02/17] doc: add detail on using " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 04/17] net/axgbe: " Ciara Power
                     ` (13 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Beilei Xing, Jeff Guo

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Beilei Xing <beilei.xing@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

---
v4: Updated enum names.
---
 drivers/net/i40e/i40e_rxtx.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 322fc1ed75..a6644b3efa 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3098,7 +3098,8 @@ static eth_rx_burst_t
 i40e_get_latest_rx_vec(bool scatter)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3115,7 +3116,8 @@ i40e_get_recommend_rx_vec(bool scatter)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
 				 i40e_recv_pkts_vec_avx2;
 #endif
@@ -3154,7 +3156,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 	}
 
-	if (ad->rx_vec_allowed) {
+	if (ad->rx_vec_allowed  && rte_get_max_simd_bitwidth()
+			>= RTE_SIMD_128) {
 		/* Vec Rx path */
 		PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
 				dev->data->port_id);
@@ -3268,7 +3271,8 @@ static eth_tx_burst_t
 i40e_get_latest_tx_vec(void)
 {
 #if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3283,7 +3287,8 @@ i40e_get_recommend_tx_vec(void)
 	 * use of AVX2 version to later plaforms, not all those that could
 	 * theoretically run it.
 	 */
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		return i40e_xmit_pkts_vec_avx2;
 #endif
 	return i40e_xmit_pkts_vec;
@@ -3311,7 +3316,8 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 	}
 
 	if (ad->tx_simple_allowed) {
-		if (ad->tx_vec_allowed) {
+		if (ad->tx_vec_allowed &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 			PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
 			if (ad->use_latest_vec)
 				dev->tx_pkt_burst =
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 04/17] net/axgbe: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (2 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 03/17] net/i40e: add checks for " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 05/17] net/bnxt: " Ciara Power
                     ` (12 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Somalapuram Amaranath

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Somalapuram Amaranath <asomalap@amd.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Amaranath Somalapuram <asomalap@amd.com>

---
v4: Updated enum name.
---
 drivers/net/axgbe/axgbe_rxtx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/axgbe/axgbe_rxtx.c b/drivers/net/axgbe/axgbe_rxtx.c
index bc93becaa5..5386bd86f8 100644
--- a/drivers/net/axgbe/axgbe_rxtx.c
+++ b/drivers/net/axgbe/axgbe_rxtx.c
@@ -557,7 +557,8 @@ int axgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!pdata->tx_queues)
 		pdata->tx_queues = dev->data->tx_queues;
 
-	if (txq->vector_disable)
+	if (txq->vector_disable || rte_get_max_simd_bitwidth()
+			< RTE_SIMD_128)
 		dev->tx_pkt_burst = &axgbe_xmit_pkts;
 	else
 #ifdef RTE_ARCH_X86
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 05/17] net/bnxt: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (3 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 04/17] net/axgbe: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 06/17] net/enic: " Ciara Power
                     ` (11 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Ajit Khaparde, Somnath Kotur

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
---
 drivers/net/bnxt/bnxt_ethdev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 8b63134c39..07d1a1a6ab 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1169,7 +1169,8 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev)
 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
 		DEV_RX_OFFLOAD_RSS_HASH |
 		DEV_RX_OFFLOAD_VLAN_FILTER)) &&
-	    !BNXT_TRUFLOW_EN(bp) && BNXT_NUM_ASYNC_CPR(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) && BNXT_NUM_ASYNC_CPR(bp) &&
+		rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 		PMD_DRV_LOG(INFO, "Using vector mode receive for port %d\n",
 			    eth_dev->data->port_id);
 		bp->flags |= BNXT_FLAG_RX_VECTOR_PKT_MODE;
@@ -1202,7 +1203,8 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 	 */
 	if (!eth_dev->data->scattered_rx &&
 	    !(offloads & ~DEV_TX_OFFLOAD_MBUF_FAST_FREE) &&
-	    !BNXT_TRUFLOW_EN(bp)) {
+	    !BNXT_TRUFLOW_EN(bp) &&
+	    rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 		PMD_DRV_LOG(INFO, "Using vector mode transmit for port %d\n",
 			    eth_dev->data->port_id);
 		return bnxt_xmit_pkts_vec;
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 06/17] net/enic: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (4 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 05/17] net/bnxt: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 07/17] net/fm10k: " Ciara Power
                     ` (10 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, John Daley, Hyong Youb Kim

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: John Daley <johndale@cisco.com>
Cc: Hyong Youb Kim <hyonkim@cisco.com>

Acked-by: Hyong Youb Kim <hyonkim@cisco.com>
Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
---
 drivers/net/enic/enic_rxtx_vec_avx2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c b/drivers/net/enic/enic_rxtx_vec_avx2.c
index 676b9f5fdb..75e9172177 100644
--- a/drivers/net/enic/enic_rxtx_vec_avx2.c
+++ b/drivers/net/enic/enic_rxtx_vec_avx2.c
@@ -821,7 +821,8 @@ enic_use_vector_rx_handler(struct rte_eth_dev *eth_dev)
 	fconf = &eth_dev->data->dev_conf.fdir_conf;
 	if (fconf->mode != RTE_FDIR_MODE_NONE)
 		return false;
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256) {
 		ENICPMD_LOG(DEBUG, " use the non-scatter avx2 Rx handler");
 		eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
 		enic->use_noscatter_vec_rx_handler = 1;
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 07/17] net/fm10k: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (5 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 06/17] net/enic: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 08/17] net/iavf: " Ciara Power
                     ` (9 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Qi Zhang, Xiao Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qi Zhang <qi.z.zhang@intel.com>
Cc: Xiao Wang <xiao.w.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Qi Zhang <qi.z.zhang@intel.com>

---
v4: Updated enum name.
---
 drivers/net/fm10k/fm10k_ethdev.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c4a6fdf7f0..78c81bf35b 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -2937,7 +2937,9 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
 		/* primary process has set the ftag flag and offloads */
 		txq = dev->data->tx_queues[0];
-		if (fm10k_tx_vec_condition_check(txq)) {
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth()
+				< RTE_SIMD_128) {
 			dev->tx_pkt_burst = fm10k_xmit_pkts;
 			dev->tx_pkt_prepare = fm10k_prep_pkts;
 			PMD_INIT_LOG(DEBUG, "Use regular Tx func");
@@ -2956,7 +2958,8 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 		txq = dev->data->tx_queues[i];
 		txq->tx_ftag_en = tx_ftag_en;
 		/* Check if Vector Tx is satisfied */
-		if (fm10k_tx_vec_condition_check(txq))
+		if (fm10k_tx_vec_condition_check(txq) ||
+				rte_get_max_simd_bitwidth() < RTE_SIMD_128)
 			use_sse = 0;
 	}
 
@@ -2990,7 +2993,9 @@ fm10k_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met.
 	 */
 	if (!fm10k_rx_vec_condition_check(dev) &&
-			dev_info->rx_vec_allowed && !rx_ftag_en) {
+			dev_info->rx_vec_allowed && !rx_ftag_en &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_SIMD_128) {
 		if (dev->data->scattered_rx)
 			dev->rx_pkt_burst = fm10k_recv_scattered_pkts_vec;
 		else
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 08/17] net/iavf: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (6 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 07/17] net/fm10k: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 09/17] net/ice: " Ciara Power
                     ` (8 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Jingjing Wu, Beilei Xing

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Jingjing Wu <jingjing.wu@intel.com>
Cc: Beilei Xing <beilei.xing@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
---
 drivers/net/iavf/iavf_rxtx.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 1b0efe0433..7c27d5beec 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2104,14 +2104,16 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_rx_vec_dev_check(dev)) {
+	if (!iavf_rx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
 			rxq = dev->data->rx_queues[i];
 			(void)iavf_rxq_vec_setup(rxq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 			use_avx2 = true;
 
 		if (dev->data->scattered_rx) {
@@ -2177,7 +2179,8 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 	int i;
 	bool use_avx2 = false;
 
-	if (!iavf_tx_vec_dev_check(dev)) {
+	if (!iavf_tx_vec_dev_check(dev) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_128) {
 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
 			txq = dev->data->tx_queues[i];
 			if (!txq)
@@ -2185,8 +2188,9 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 			iavf_txq_vec_setup(txq);
 		}
 
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+		if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 			use_avx2 = true;
 
 		PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 09/17] net/ice: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (7 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 08/17] net/iavf: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 12:11     ` Zhang, Qi Z
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 10/17] net/ixgbe: " Ciara Power
                     ` (7 subsequent siblings)
  16 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Qiming Yang, Qi Zhang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Qiming Yang <qiming.yang@intel.com>
Cc: Qi Zhang <qi.z.zhang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
---
 drivers/net/ice/ice_rxtx.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 93a0ac6918..0003ce2afe 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -2989,7 +2989,9 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed) {
+		if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_SIMD_128) {
 			ad->rx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
 				rxq = dev->data->rx_queues[i];
@@ -2999,8 +3001,10 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_SIMD_256)
 				use_avx2 = true;
 
 		} else {
@@ -3167,7 +3171,9 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 	bool use_avx2 = false;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		if (!ice_tx_vec_dev_check(dev)) {
+		if (!ice_tx_vec_dev_check(dev) &&
+				rte_get_max_simd_bitwidth()
+				>= RTE_SIMD_128) {
 			ad->tx_vec_allowed = true;
 			for (i = 0; i < dev->data->nb_tx_queues; i++) {
 				txq = dev->data->tx_queues[i];
@@ -3177,8 +3183,10 @@ ice_set_tx_function(struct rte_eth_dev *dev)
 				}
 			}
 
-			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
+					rte_get_max_simd_bitwidth()
+					>= RTE_SIMD_256)
 				use_avx2 = true;
 
 		} else {
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 10/17] net/ixgbe: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (8 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 09/17] net/ice: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:20     ` Wang, Haiyue
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 11/17] net/mlx5: " Ciara Power
                     ` (6 subsequent siblings)
  16 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Wei Zhao, Jeff Guo, Haiyue Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Wei Zhao <wei.zhao1@intel.com>
Cc: Jeff Guo <jia.guo@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

---
v4:
  - Updated enum name.
  - Moved placement of condition check.
  - Added condition check to tx cleanup path selection.
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 977ecf5137..d371647c0e 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -2404,6 +2404,7 @@ ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
 #endif
 			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_128 &&
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
 					txq->sw_ring_v != NULL)) {
 			return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
@@ -2502,6 +2503,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
 		PMD_INIT_LOG(DEBUG, "Using simple tx code path");
 		dev->tx_pkt_prepare = NULL;
 		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
+				rte_get_max_simd_bitwidth() >= RTE_SIMD_128 &&
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
 					ixgbe_txq_vec_setup(txq) == 0)) {
 			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
@@ -4743,7 +4745,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	 * conditions to be met and Rx Bulk Allocation should be allowed.
 	 */
 	if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
-	    !adapter->rx_bulk_alloc_allowed) {
+	    !adapter->rx_bulk_alloc_allowed ||
+			rte_get_max_simd_bitwidth() < RTE_SIMD_128) {
 		PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
 				    "preconditions",
 			     dev->data->port_id);
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 11/17] net/mlx5: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (9 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 10/17] net/ixgbe: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 12/17] net/virtio: " Ciara Power
                     ` (5 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Matan Azrad, Shahaf Shuler,
	Viacheslav Ovsiienko, Viacheslav Ovsiienko, Matan Azrad,
	Shahaf Shuler

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Matan Azrad <matan@mellanox.com>
Cc: Shahaf Shuler <shahafs@mellanox.com>
Cc: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

---
v4: Updated enum name.
v2: Moved check for max bitwidth into existing check vec
    support function.
---
 drivers/net/mlx5/mlx5_rxtx_vec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 711dcd35fa..49f1b61ff8 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -148,6 +148,8 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	uint32_t i;
 
+	if (rte_get_max_simd_bitwidth() < RTE_SIMD_128)
+		return -ENOTSUP;
 	if (!priv->config.rx_vec_en)
 		return -ENOTSUP;
 	if (mlx5_mprq_enabled(dev))
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 12/17] net/virtio: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (10 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 11/17] net/mlx5: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-14  2:02     ` Xia, Chenbo
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 13/17] distributor: " Ciara Power
                     ` (4 subsequent siblings)
  16 siblings, 1 reply; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Maxime Coquelin, Chenbo Xia,
	Zhihong Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: Chenbo Xia <chenbo.xia@intel.com>
Cc: Zhihong Wang <zhihong.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4: Updated enum name.
v3: Moved max SIMD bitwidth check to configure function with other vec
    support checks.
---
 drivers/net/virtio/virtio_ethdev.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 0236c756dc..70955e5cc0 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -2313,7 +2313,8 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
 		     !vtpci_with_feature(hw, VIRTIO_F_IN_ORDER) ||
-		     !vtpci_with_feature(hw, VIRTIO_F_VERSION_1))) {
+		     !vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
+		     rte_get_max_simd_bitwidth() < RTE_SIMD_512)) {
 			PMD_DRV_LOG(INFO,
 				"disabled packed ring vectorized path for requirements not met");
 			hw->use_vec_rx = 0;
@@ -2366,6 +2367,12 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 					"disabled split ring vectorized rx for offloading enabled");
 				hw->use_vec_rx = 0;
 			}
+
+			if (rte_get_max_simd_bitwidth() < RTE_SIMD_128) {
+				PMD_DRV_LOG(INFO,
+					"disabled split ring vectorized rx, max SIMD bitwidth too low");
+				hw->use_vec_rx = 0;
+			}
 		}
 	}
 
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 13/17] distributor: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (11 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 12/17] net/virtio: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 14/17] member: " Ciara Power
                     ` (3 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, David Hunt

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: David Hunt <david.hunt@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: David Hunt <david.hunt@intel.com>

---
v4: Updated enum name.
---
 lib/librte_distributor/rte_distributor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c
index 1c047f065a..05e61dddfc 100644
--- a/lib/librte_distributor/rte_distributor.c
+++ b/lib/librte_distributor/rte_distributor.c
@@ -636,7 +636,8 @@ rte_distributor_create(const char *name,
 
 	d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
 #if defined(RTE_ARCH_X86)
-	d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
+	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
+		d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
 #endif
 
 	/*
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 14/17] member: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (12 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 13/17] distributor: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 15/17] efd: " Ciara Power
                     ` (2 subsequent siblings)
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Yipeng Wang, Sameh Gobriel

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU
enabled path.

Cc: Yipeng Wang <yipeng1.wang@intel.com>
Cc: Sameh Gobriel <sameh.gobriel@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Yipeng Wang <yipeng1.wang@intel.com>

---
v4: Updated enum name.
---
 lib/librte_member/rte_member_ht.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c
index 3ea293a094..98c8aac248 100644
--- a/lib/librte_member/rte_member_ht.c
+++ b/lib/librte_member/rte_member_ht.c
@@ -113,7 +113,8 @@ rte_member_create_ht(struct rte_member_setsum *ss,
 	}
 #if defined(RTE_ARCH_X86)
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
-			RTE_MEMBER_BUCKET_ENTRIES == 16)
+			RTE_MEMBER_BUCKET_ENTRIES == 16 &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
 	else
 #endif
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 15/17] efd: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (13 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 14/17] member: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 16/17] net: " Ciara Power
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 17/17] node: choose vector path at runtime Ciara Power
  16 siblings, 0 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Byron Marohn, Yipeng Wang

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

Cc: Byron Marohn <byron.marohn@intel.com>
Cc: Yipeng Wang <yipeng1.wang@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
Acked-by: Yipeng Wang <yipeng1.wang@intel.com>

---
v4: Updated enum name.
---
 lib/librte_efd/rte_efd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
index 6a799556d4..e925b73a9c 100644
--- a/lib/librte_efd/rte_efd.c
+++ b/lib/librte_efd/rte_efd.c
@@ -645,7 +645,9 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len,
 	 * For less than 4 bits, scalar function performs better
 	 * than vectorised version
 	 */
-	if (RTE_EFD_VALUE_NUM_BITS > 3 && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+	if (RTE_EFD_VALUE_NUM_BITS > 3
+			&& rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)
+			&& rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
 		table->lookup_fn = EFD_LOOKUP_AVX2;
 	else
 #endif
@@ -655,7 +657,8 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len,
 	 * than vectorised version
 	 */
 	if (RTE_EFD_VALUE_NUM_BITS > 16 &&
-	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+	    rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
+			rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
 		table->lookup_fn = EFD_LOOKUP_NEON;
 	else
 #endif
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 16/17] net: add checks for max SIMD bitwidth
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (14 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 15/17] efd: " Ciara Power
@ 2020-10-13 11:04   ` " Ciara Power
  2020-10-13 11:32     ` Olivier Matz
  2020-10-13 13:07     ` Ananyev, Konstantin
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 17/17] node: choose vector path at runtime Ciara Power
  16 siblings, 2 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Jasvinder Singh, Olivier Matz

When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path.

The vector path was initially chosen in RTE_INIT, however this is no
longer suitable as we cannot check the max SIMD bitwidth at that time.
Default handlers are now chosen in RTE_INIT, these default handlers
are used the first time the crc calc is called, and they set the suitable
handlers to be used going forward.

Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
Suggested-by: Olivier Matz <olivier.matz@6wind.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>

---
v4:
  - Added default handlers to be set at RTE_INIT time, rather than
    choosing scalar handlers.
  - Modified logging.
  - Updated enum name.
v3:
  - Moved choosing vector paths out of RTE_INIT.
  - Moved checking max_simd_bitwidth into the set_alg function.
---
 lib/librte_net/rte_net_crc.c | 75 ++++++++++++++++++++++++++++++------
 lib/librte_net/rte_net_crc.h |  8 ++++
 2 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
index 4f5b9e8286..11d0161a32 100644
--- a/lib/librte_net/rte_net_crc.c
+++ b/lib/librte_net/rte_net_crc.c
@@ -9,6 +9,7 @@
 #include <rte_cpuflags.h>
 #include <rte_common.h>
 #include <rte_net_crc.h>
+#include <rte_eal.h>
 
 #if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__)
 #define X86_64_SSE42_PCLMULQDQ     1
@@ -32,6 +33,12 @@
 static uint32_t crc32_eth_lut[CRC_LUT_SIZE];
 static uint32_t crc16_ccitt_lut[CRC_LUT_SIZE];
 
+static uint32_t
+rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len);
+
+static uint32_t
+rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len);
+
 static uint32_t
 rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len);
 
@@ -41,7 +48,12 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len);
 typedef uint32_t
 (*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len);
 
-static rte_net_crc_handler *handlers;
+static rte_net_crc_handler handlers_default[] = {
+	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_default_handler,
+	[RTE_NET_CRC32_ETH] = rte_crc32_eth_default_handler,
+};
+
+static rte_net_crc_handler *handlers = handlers_default;
 
 static rte_net_crc_handler handlers_scalar[] = {
 	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler,
@@ -60,6 +72,9 @@ static rte_net_crc_handler handlers_neon[] = {
 };
 #endif
 
+static uint16_t max_simd_bitwidth;
+RTE_LOG_REGISTER(libnet_logtype, lib.net, INFO);
+
 /**
  * Reflect the bits about the middle
  *
@@ -112,6 +127,42 @@ crc32_eth_calc_lut(const uint8_t *data,
 	return crc;
 }
 
+static uint32_t
+rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len)
+{
+	if (max_simd_bitwidth == 0)
+		max_simd_bitwidth = rte_get_max_simd_bitwidth();
+	handlers = handlers_scalar;
+#ifdef X86_64_SSE42_PCLMULQDQ
+	if (max_simd_bitwidth >= RTE_SIMD_128)
+		handlers = handlers_sse42;
+#endif
+#ifdef ARM64_NEON_PMULL
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
+			max_simd_bitwidth >= RTE_SIMD_128) {
+		handlers = handlers_neon;
+#endif
+	return handlers[RTE_NET_CRC16_CCITT](data, data_len);
+}
+
+static uint32_t
+rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
+{
+	if (max_simd_bitwidth == 0)
+		max_simd_bitwidth = rte_get_max_simd_bitwidth();
+	handlers = handlers_scalar;
+#ifdef X86_64_SSE42_PCLMULQDQ
+	if (max_simd_bitwidth >= RTE_SIMD_128)
+		handlers = handlers_sse42;
+#endif
+#ifdef ARM64_NEON_PMULL
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
+			max_simd_bitwidth >= RTE_SIMD_128) {
+		handlers = handlers_neon;
+#endif
+	return handlers[RTE_NET_CRC32_ETH](data, data_len);
+}
+
 static void
 rte_net_crc_scalar_init(void)
 {
@@ -145,18 +196,26 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
 void
 rte_net_crc_set_alg(enum rte_net_crc_alg alg)
 {
+	if (max_simd_bitwidth == 0)
+		max_simd_bitwidth = rte_get_max_simd_bitwidth();
+
 	switch (alg) {
 #ifdef X86_64_SSE42_PCLMULQDQ
 	case RTE_NET_CRC_SSE42:
-		handlers = handlers_sse42;
-		break;
+		if (max_simd_bitwidth >= RTE_SIMD_128) {
+			handlers = handlers_sse42;
+			return;
+		}
+		NET_LOG(INFO, "Max SIMD Bitwidth too low, can't use SSE\n");
 #elif defined ARM64_NEON_PMULL
 		/* fall-through */
 	case RTE_NET_CRC_NEON:
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
+		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
+				max_simd_bitwidth >= RTE_SIMD_128) {
 			handlers = handlers_neon;
-			break;
+			return;
 		}
+		NET_LOG(INFO, "Max SIMD Bitwidth too low or CPU flag not enabled, can't use NEON\n");
 #endif
 		/* fall-through */
 	case RTE_NET_CRC_SCALAR:
@@ -184,19 +243,13 @@ rte_net_crc_calc(const void *data,
 /* Select highest available crc algorithm as default one */
 RTE_INIT(rte_net_crc_init)
 {
-	enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
-
 	rte_net_crc_scalar_init();
 
 #ifdef X86_64_SSE42_PCLMULQDQ
-	alg = RTE_NET_CRC_SSE42;
 	rte_net_crc_sse42_init();
 #elif defined ARM64_NEON_PMULL
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
-		alg = RTE_NET_CRC_NEON;
 		rte_net_crc_neon_init();
 	}
 #endif
-
-	rte_net_crc_set_alg(alg);
 }
diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
index 16e85ca970..c942865ecf 100644
--- a/lib/librte_net/rte_net_crc.h
+++ b/lib/librte_net/rte_net_crc.h
@@ -7,6 +7,8 @@
 
 #include <stdint.h>
 
+#include <rte_log.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -25,6 +27,12 @@ enum rte_net_crc_alg {
 	RTE_NET_CRC_NEON,
 };
 
+extern int libnet_logtype;
+
+#define NET_LOG(level, fmt, args...)					\
+	rte_log(RTE_LOG_ ## level, libnet_logtype, "%s(): " fmt "\n",	\
+		__func__, ## args)
+
 /**
  * This API set the CRC computation algorithm (i.e. scalar version,
  * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* [dpdk-dev] [PATCH v5 17/17] node: choose vector path at runtime
  2020-10-13 11:04 ` [dpdk-dev] [PATCH v5 00/17] add max SIMD bitwidth to EAL Ciara Power
                     ` (15 preceding siblings ...)
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 16/17] net: " Ciara Power
@ 2020-10-13 11:04   ` Ciara Power
  2020-10-13 13:42     ` Ananyev, Konstantin
  2020-10-14  8:28     ` Ruifeng Wang
  16 siblings, 2 replies; 276+ messages in thread
From: Ciara Power @ 2020-10-13 11:04 UTC (permalink / raw)
  To: dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Ciara Power, Nithin Dabilpuram,
	Pavan Nikhilesh, Kiran Kumar K

When choosing the vector path, max SIMD bitwidth is now checked to
ensure the vector path is suitable. To do this, rather than the
scalar/vector lookup functions being called directly from the apps, a
generic function is called which will then call the scalar or vector
lookup function.

Cc: Nithin Dabilpuram <ndabilpuram@marvell.com>
Cc: Pavan Nikhilesh <pbhagavatula@marvell.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Kiran Kumar K <kirankumark@marvell.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_node/ip4_lookup.c      | 13 +++++++++++--
 lib/librte_node/ip4_lookup_neon.h |  2 +-
 lib/librte_node/ip4_lookup_sse.h  |  2 +-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
index 293c77f39e..b3edbc1f4d 100644
--- a/lib/librte_node/ip4_lookup.c
+++ b/lib/librte_node/ip4_lookup.c
@@ -34,10 +34,10 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
 #include "ip4_lookup_neon.h"
 #elif defined(RTE_ARCH_X86)
 #include "ip4_lookup_sse.h"
-#else
+#endif
 
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_ipv4_hdr *ipv4_hdr;
@@ -109,7 +109,16 @@ ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
 	return nb_objs;
 }
 
+static uint16_t
+ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+			void **objs, uint16_t nb_objs)
+{
+#if defined(RTE_MACHINE_CPUFLAG_NEON) || defined(RTE_ARCH_X86)
+	if (rte_get_max_simd_bitwidth() >= RTE_SIMD_128)
+		return ip4_lookup_node_process_vec(graph, node, objs, nb_objs);
 #endif
+	return ip4_lookup_node_process_scalar(graph, node, objs, nb_objs);
+}
 
 int
 rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop,
diff --git a/lib/librte_node/ip4_lookup_neon.h b/lib/librte_node/ip4_lookup_neon.h
index 5e5a7d87be..0ad2763b82 100644
--- a/lib/librte_node/ip4_lookup_neon.h
+++ b/lib/librte_node/ip4_lookup_neon.h
@@ -7,7 +7,7 @@
 
 /* ARM64 NEON */
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
diff --git a/lib/librte_node/ip4_lookup_sse.h b/lib/librte_node/ip4_lookup_sse.h
index a071cc5919..264c986071 100644
--- a/lib/librte_node/ip4_lookup_sse.h
+++ b/lib/librte_node/ip4_lookup_sse.h
@@ -7,7 +7,7 @@
 
 /* X86 SSE */
 static uint16_t
-ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node,
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
 			void **objs, uint16_t nb_objs)
 {
 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
-- 
2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v5 10/17] net/ixgbe: add checks for max SIMD bitwidth
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 10/17] net/ixgbe: " Ciara Power
@ 2020-10-13 11:20     ` Wang, Haiyue
  0 siblings, 0 replies; 276+ messages in thread
From: Wang, Haiyue @ 2020-10-13 11:20 UTC (permalink / raw)
  To: Power, Ciara, dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, Richardson, Bruce, Ananyev,
	Konstantin, Zhao1, Wei, Guo, Jia

> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Tuesday, October 13, 2020 19:05
> To: dev@dpdk.org
> Cc: viktorin@rehivetech.com; ruifeng.wang@arm.com; jerinj@marvell.com; drc@linux.vnet.ibm.com;
> Richardson, Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> Power, Ciara <ciara.power@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Guo, Jia <jia.guo@intel.com>;
> Wang, Haiyue <haiyue.wang@intel.com>
> Subject: [PATCH v5 10/17] net/ixgbe: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> Cc: Wei Zhao <wei.zhao1@intel.com>
> Cc: Jeff Guo <jia.guo@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 
> ---
> v4:
>   - Updated enum name.
>   - Moved placement of condition check.
>   - Added condition check to tx cleanup path selection.
> ---
>  drivers/net/ixgbe/ixgbe_rxtx.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)

Reviewed-by: Haiyue Wang <haiyue.wang@intel.com>

> 2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth
  2020-10-08 14:55               ` Ananyev, Konstantin
@ 2020-10-13 11:27                 ` Power, Ciara
  0 siblings, 0 replies; 276+ messages in thread
From: Power, Ciara @ 2020-10-13 11:27 UTC (permalink / raw)
  To: Ananyev, Konstantin, Olivier Matz
  Cc: Coyle, David, Singh, Jasvinder, dev, O'loingsigh, Mairtin,
	Ryan, Brendan, Richardson, Bruce

Hi Konstantin,


>-----Original Message-----
>From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
>Sent: Thursday 8 October 2020 15:55
>To: Olivier Matz <olivier.matz@6wind.com>; Power, Ciara
><ciara.power@intel.com>
>Cc: Coyle, David <david.coyle@intel.com>; Singh, Jasvinder
><jasvinder.singh@intel.com>; dev@dpdk.org; O'loingsigh, Mairtin
><mairtin.oloingsigh@intel.com>; Ryan, Brendan <brendan.ryan@intel.com>;
>Richardson, Bruce <bruce.richardson@intel.com>
>Subject: RE: [dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD
>bitwidth
>
>> > >> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Ciara Power
>> > >> > > When choosing a vector path to take, an extra condition must
>> > >> > > be satisfied to ensure the max SIMD bitwidth allows for the
>> > >> > > CPU enabled
>> > >path.
>> > >> > >
>> > >> > > The vector path was initially chosen in RTE_INIT, however
>> > >> > > this is no longer suitable as we cannot check the max SIMD
>> > >> > > bitwidth at that
>> > >time.
>> > >> > > The default chosen in RTE_INIT is now scalar. For best
>> > >> > > performance and to use vector paths, apps must explicitly
>> > >> > > call the set algorithm function before using other functions
>> > >> > > from this library, as this is where vector handlers are now chosen.
>> > >> >
>> > >> > [DC] Has it been decided that it is ok to now require
>> > >> > applications to pick the CRC algorithm they want to use?
>> > >> >
>> > >> > An application which previously automatically got SSE4.2 CRC,
>> > >> > for example, will now automatically only get scalar.
>> > >> >
>> > >> > If this is ok, this should probably be called out explicitly in
>> > >> > release notes as it may not be Immediately noticeable to users
>> > >> > that they now need to select the CRC algo.
>> > >> >
>> > >> > Actually, in general, the release notes need to be updated for
>> > >> > this
>> > >> patchset.
>> > >>
>> > >> The decision to move rte_set_alg() out of RTE_INIT was taken to
>> > >> avoid check on max_simd_bitwidth in data path for every single
>> > >> time when
>> > >> crc_calc() api is invoked. Based on my understanding,
>> > >> max_simd_bitwidth is set after eal init, and when used in
>> > >> crc_calc(), it might override the default crc algo set during
>> > >> RTE_INIT. Therefore, to avoid extra check on max_simd_bitwidth in
>> > >> data path,  better option will be to use this static
>> > >> configuration one time after eal init in the set_algo
>> > >API.
>> > >
>> > >[DC] Yes that is a good change to have made to avoid extra datapath
>checks.
>> > >
>> > >Based on off-list discussion, I now also know the reason behind now
>> > >defaulting to scalar CRC in RTE_INIT. If a higher bitwidth CRC was
>> > >chosen by RTE_INIT (e.g.
>> > >SSE4.2 CRC) but the max_simd_bitwidth was then set to RTE_NO_SIMD
>> > >(64) through the EAL parameter or call to
>> > >rte_set_max_simd_bitwidth(), then there is a mismatch if
>> > >rte_net_crc_set_alg() is not then called to reconfigure the CRC.
>> > >Defaulting to scalar avoids this mismatch and works on all archs
>> > >
>> > >As I mentioned before, I think this needs to be called out in
>> > >release notes, as it's an under-the-hood change which could cause
>> > >app performance to drop if app developers aren't aware of it - the
>> > >API itself hasn't changed, so they may not read the doxygen :)
>> > >
>> >
>> > Yes that is a good point, I can add to the release notes for this to call it
>out.
>>
>> I don't think it is a good idea to have the scalar crc by default.
>> To me, the fastest available CRC has to be enabled by default.
>>
>> I understand the technical reason why you did it like this however:
>> the SIMD bitwidth may not be known at the time the
>> RTE_INIT(rte_net_crc_init) function is called.
>>
>> A simple approach to solve this issue would be to initialize the
>> rte_net_crc_handler pointer to a handlers_default. The first time a
>> crc is called, the rte_crc32_*_default_handler() function would check
>> the configured SIMD bitwidth, and set the handler to the correct one,
>> to avoid to do the test for next time.
>>
>> This approach still does not solve the case where the SIMD bitwidth is
>> modified during the life of the application. In this case, a callback
>> would have to be registered to notify SIMD bitwidth changes... but I
>> don't think it is worth to do it. Instead, it can be documented that
>> rte_set_max_simd_bitwidth() has to be called early, before
>> rte_eal_init().
>
>Actually I also thought about callback approach.
>It does complicate things a bit for sure, but on a positive side - it allows to
>solve RTE_INIT() code-path selection problem in a generic way, plus it means
>zero changes in the data-path.
>So probably worth to consider it.
>

I am not sure adding callbacks to allow for runtime changes to max SIMD bitwidth is worth it.
I have sent a new version of my patchset which currently does not have this suggested rework to use callbacks.

Thanks,
Ciara

<snip>

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v5 16/17] net: add checks for max SIMD bitwidth
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 16/17] net: " Ciara Power
@ 2020-10-13 11:32     ` Olivier Matz
  2020-10-13 13:07     ` Ananyev, Konstantin
  1 sibling, 0 replies; 276+ messages in thread
From: Olivier Matz @ 2020-10-13 11:32 UTC (permalink / raw)
  To: Ciara Power
  Cc: dev, viktorin, ruifeng.wang, jerinj, drc, bruce.richardson,
	konstantin.ananyev, Jasvinder Singh

Hi Ciara,

On Tue, Oct 13, 2020 at 12:04:36PM +0100, Ciara Power wrote:
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> The vector path was initially chosen in RTE_INIT, however this is no
> longer suitable as we cannot check the max SIMD bitwidth at that time.
> Default handlers are now chosen in RTE_INIT, these default handlers
> are used the first time the crc calc is called, and they set the suitable
> handlers to be used going forward.
> 
> Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
> Suggested-by: Olivier Matz <olivier.matz@6wind.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 

[...]

> --- a/lib/librte_net/rte_net_crc.h
> +++ b/lib/librte_net/rte_net_crc.h
> @@ -7,6 +7,8 @@
>  
>  #include <stdint.h>
>  
> +#include <rte_log.h>
> +
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
> @@ -25,6 +27,12 @@ enum rte_net_crc_alg {
>  	RTE_NET_CRC_NEON,
>  };
>  
> +extern int libnet_logtype;
> +
> +#define NET_LOG(level, fmt, args...)					\
> +	rte_log(RTE_LOG_ ## level, libnet_logtype, "%s(): " fmt "\n",	\
> +		__func__, ## args)
> +
>  /**
>   * This API set the CRC computation algorithm (i.e. scalar version,
>   * x86 64-bit sse4.2 intrinsic version, etc.) and internal data

We should expose this log macro and log type in a public header file. It
can stay in the .c file. In the future, we may want to expose it in a
private header, but not in a public API.

Regards,
Olivier

^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v5 01/17] eal: add max SIMD bitwidth
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 01/17] eal: add max SIMD bitwidth Ciara Power
@ 2020-10-13 11:58     ` Ananyev, Konstantin
  2020-10-14  8:50     ` Ruifeng Wang
  1 sibling, 0 replies; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-13 11:58 UTC (permalink / raw)
  To: Power, Ciara, dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, Richardson, Bruce,
	Honnappa Nagarahalli, Dmitry Kozlyuk, Narcisa Ana Maria Vasile,
	Dmitry Malloy, Kadam, Pallavi, Ray Kinsella, Neil Horman


> 
> This patch adds a max SIMD bitwidth EAL configuration. The API allows
> for an app to set this value. It can also be set using EAL argument
> --force-max-simd-bitwidth, which will lock the value and override any
> modifications made by the app.
> 
> Each arch has a define for the default SIMD bitwidth value, this is used
> on EAL init to set the config max SIMD bitwidth.
> 
> Cc: Ruifeng Wang <ruifeng.wang@arm.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Cc: David Christensen <drc@linux.vnet.ibm.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v4:
>   - Used RTE_SIMD_MAX instead of UINT16_MAX.
>   - Renamed enums to better reflect usage.
>   - Added functions to windows symbol export file.
>   - Modified Doxygen comments.
>   - Modified enum name.
>   - Changed RTE_SIMD_MAX value to a power of 2.
>   - Merged patch 2 into this patch.
>   - Enum now used for default value defines.
>   - Fixed some small comments on v3.
> v3:
>   - Added enum value to essentially disable using max SIMD to choose
>     paths, intended for use by ARM SVE.
>   - Fixed parsing bitwidth argument to return an error for values
>     greater than uint16_t.
>   - Removed unnecessary define in generic rte_vect.h
>   - Changed default bitwidth for ARM to UINT16_MAX, to allow for SVE.
> v2:
>   - Added to Doxygen comment for API.
>   - Changed default bitwidth for Arm to 128.
> ---

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v5 09/17] net/ice: add checks for max SIMD bitwidth
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 09/17] net/ice: " Ciara Power
@ 2020-10-13 12:11     ` Zhang, Qi Z
  0 siblings, 0 replies; 276+ messages in thread
From: Zhang, Qi Z @ 2020-10-13 12:11 UTC (permalink / raw)
  To: Power, Ciara, dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, Richardson, Bruce, Ananyev,
	Konstantin, Yang, Qiming



> -----Original Message-----
> From: Power, Ciara <ciara.power@intel.com>
> Sent: Tuesday, October 13, 2020 7:04 PM
> To: dev@dpdk.org
> Cc: viktorin@rehivetech.com; ruifeng.wang@arm.com; jerinj@marvell.com;
> drc@linux.vnet.ibm.com; Richardson, Bruce <bruce.richardson@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>; Power, Ciara
> <ciara.power@intel.com>; Yang, Qiming <qiming.yang@intel.com>; Zhang, Qi
> Z <qi.z.zhang@intel.com>
> Subject: [PATCH v5 09/17] net/ice: add checks for max SIMD bitwidth
> 
> When choosing a vector path to take, an extra condition must be satisfied to
> ensure the max SIMD bitwidth allows for the CPU enabled path.
> 
> Cc: Qiming Yang <qiming.yang@intel.com>
> Cc: Qi Zhang <qi.z.zhang@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>

Acked-by: Qi Zhang <qi.z.zhang@intel.com>



^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v5 16/17] net: add checks for max SIMD bitwidth
  2020-10-13 11:04   ` [dpdk-dev] [PATCH v5 16/17] net: " Ciara Power
  2020-10-13 11:32     ` Olivier Matz
@ 2020-10-13 13:07     ` Ananyev, Konstantin
  2020-10-13 13:25       ` Ananyev, Konstantin
  1 sibling, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-13 13:07 UTC (permalink / raw)
  To: Power, Ciara, dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, Richardson, Bruce, Singh,
	Jasvinder, Olivier Matz

> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> The vector path was initially chosen in RTE_INIT, however this is no
> longer suitable as we cannot check the max SIMD bitwidth at that time.
> Default handlers are now chosen in RTE_INIT, these default handlers
> are used the first time the crc calc is called, and they set the suitable
> handlers to be used going forward.
> 
> Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
> Suggested-by: Olivier Matz <olivier.matz@6wind.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> 
> ---
> v4:
>   - Added default handlers to be set at RTE_INIT time, rather than
>     choosing scalar handlers.
>   - Modified logging.
>   - Updated enum name.
> v3:
>   - Moved choosing vector paths out of RTE_INIT.
>   - Moved checking max_simd_bitwidth into the set_alg function.
> ---
>  lib/librte_net/rte_net_crc.c | 75 ++++++++++++++++++++++++++++++------
>  lib/librte_net/rte_net_crc.h |  8 ++++
>  2 files changed, 72 insertions(+), 11 deletions(-)
> 
> diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
> index 4f5b9e8286..11d0161a32 100644
> --- a/lib/librte_net/rte_net_crc.c
> +++ b/lib/librte_net/rte_net_crc.c
> @@ -9,6 +9,7 @@
>  #include <rte_cpuflags.h>
>  #include <rte_common.h>
>  #include <rte_net_crc.h>
> +#include <rte_eal.h>
> 
>  #if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__)
>  #define X86_64_SSE42_PCLMULQDQ     1
> @@ -32,6 +33,12 @@
>  static uint32_t crc32_eth_lut[CRC_LUT_SIZE];
>  static uint32_t crc16_ccitt_lut[CRC_LUT_SIZE];
> 
> +static uint32_t
> +rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len);
> +
> +static uint32_t
> +rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len);
> +
>  static uint32_t
>  rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len);
> 
> @@ -41,7 +48,12 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len);
>  typedef uint32_t
>  (*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len);
> 
> -static rte_net_crc_handler *handlers;
> +static rte_net_crc_handler handlers_default[] = {
> +	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_default_handler,
> +	[RTE_NET_CRC32_ETH] = rte_crc32_eth_default_handler,
> +};
> +
> +static rte_net_crc_handler *handlers = handlers_default;
> 
>  static rte_net_crc_handler handlers_scalar[] = {
>  	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler,
> @@ -60,6 +72,9 @@ static rte_net_crc_handler handlers_neon[] = {
>  };
>  #endif
> 
> +static uint16_t max_simd_bitwidth;
> +RTE_LOG_REGISTER(libnet_logtype, lib.net, INFO);
> +
>  /**
>   * Reflect the bits about the middle
>   *
> @@ -112,6 +127,42 @@ crc32_eth_calc_lut(const uint8_t *data,
>  	return crc;
>  }
> 
> +static uint32_t
> +rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len)
> +{
> +	if (max_simd_bitwidth == 0)
> +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +	handlers = handlers_scalar;
> +#ifdef X86_64_SSE42_PCLMULQDQ
> +	if (max_simd_bitwidth >= RTE_SIMD_128)
> +		handlers = handlers_sse42;
> +#endif
> +#ifdef ARM64_NEON_PMULL
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
> +			max_simd_bitwidth >= RTE_SIMD_128) {
> +		handlers = handlers_neon;
> +#endif

You probably don't want to make all these checks for *every* invocation
of that function. I think it would be better:
if (ma_simd_bitwidth == 0) {....}
return handlers[..](...);

BTW, while it allows us to use best possible handler,
such approach means extra indirect call(/jump) anyway.
Hard to say off-hand would it affect performance,
and if yes how significantly.
Couldn't find any perf tests in our UT for it...

> +	return handlers[RTE_NET_CRC16_CCITT](data, data_len);
> +}
> +
> +static uint32_t
> +rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
> +{
> +	if (max_simd_bitwidth == 0)
> +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +	handlers = handlers_scalar;
> +#ifdef X86_64_SSE42_PCLMULQDQ
> +	if (max_simd_bitwidth >= RTE_SIMD_128)
> +		handlers = handlers_sse42;
> +#endif
> +#ifdef ARM64_NEON_PMULL
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
> +			max_simd_bitwidth >= RTE_SIMD_128) {
> +		handlers = handlers_neon;
> +#endif
> +	return handlers[RTE_NET_CRC32_ETH](data, data_len);
> +}
> +
>  static void
>  rte_net_crc_scalar_init(void)
>  {
> @@ -145,18 +196,26 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
>  void
>  rte_net_crc_set_alg(enum rte_net_crc_alg alg)
>  {
> +	if (max_simd_bitwidth == 0)
> +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +
>  	switch (alg) {
>  #ifdef X86_64_SSE42_PCLMULQDQ
>  	case RTE_NET_CRC_SSE42:
> -		handlers = handlers_sse42;
> -		break;
> +		if (max_simd_bitwidth >= RTE_SIMD_128) {
> +			handlers = handlers_sse42;
> +			return;
> +		}
> +		NET_LOG(INFO, "Max SIMD Bitwidth too low, can't use SSE\n");
>  #elif defined ARM64_NEON_PMULL
>  		/* fall-through */
>  	case RTE_NET_CRC_NEON:
> -		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
> +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
> +				max_simd_bitwidth >= RTE_SIMD_128) {
>  			handlers = handlers_neon;
> -			break;
> +			return;
>  		}
> +		NET_LOG(INFO, "Max SIMD Bitwidth too low or CPU flag not enabled, can't use NEON\n");
>  #endif
>  		/* fall-through */
>  	case RTE_NET_CRC_SCALAR:
> @@ -184,19 +243,13 @@ rte_net_crc_calc(const void *data,
>  /* Select highest available crc algorithm as default one */
>  RTE_INIT(rte_net_crc_init)
>  {
> -	enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
> -
>  	rte_net_crc_scalar_init();
> 
>  #ifdef X86_64_SSE42_PCLMULQDQ
> -	alg = RTE_NET_CRC_SSE42;
>  	rte_net_crc_sse42_init();
>  #elif defined ARM64_NEON_PMULL
>  	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
> -		alg = RTE_NET_CRC_NEON;
>  		rte_net_crc_neon_init();
>  	}
>  #endif
> -
> -	rte_net_crc_set_alg(alg);
>  }
> diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
> index 16e85ca970..c942865ecf 100644
> --- a/lib/librte_net/rte_net_crc.h
> +++ b/lib/librte_net/rte_net_crc.h
> @@ -7,6 +7,8 @@
> 
>  #include <stdint.h>
> 
> +#include <rte_log.h>
> +
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
> @@ -25,6 +27,12 @@ enum rte_net_crc_alg {
>  	RTE_NET_CRC_NEON,
>  };
> 
> +extern int libnet_logtype;
> +
> +#define NET_LOG(level, fmt, args...)					\
> +	rte_log(RTE_LOG_ ## level, libnet_logtype, "%s(): " fmt "\n",	\
> +		__func__, ## args)
> +
>  /**
>   * This API set the CRC computation algorithm (i.e. scalar version,
>   * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
> --
> 2.22.0


^ permalink raw reply	[flat|nested] 276+ messages in thread

* Re: [dpdk-dev] [PATCH v5 16/17] net: add checks for max SIMD bitwidth
  2020-10-13 13:07     ` Ananyev, Konstantin
@ 2020-10-13 13:25       ` Ananyev, Konstantin
  2020-10-13 13:57         ` Ananyev, Konstantin
  0 siblings, 1 reply; 276+ messages in thread
From: Ananyev, Konstantin @ 2020-10-13 13:25 UTC (permalink / raw)
  To: Ananyev, Konstantin, Power, Ciara, dev
  Cc: viktorin, ruifeng.wang, jerinj, drc, Richardson, Bruce, Singh,
	Jasvinder, Olivier Matz


> 
> > When choosing a vector path to take, an extra condition must be
> > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> > path.
> >
> > The vector path was initially chosen in RTE_INIT, however this is no
> > longer suitable as we cannot check the max SIMD bitwidth at that time.
> > Default handlers are now chosen in RTE_INIT, these default handlers
> > are used the first time the crc calc is called, and they set the suitable
> > handlers to be used going forward.
> >
> > Suggested-by: Jasvinder Singh <jasvinder.singh@intel.com>
> > Suggested-by: Olivier Matz <olivier.matz@6wind.com>
> >
> > Signed-off-by: Ciara Power <ciara.power@intel.com>
> >
> > ---
> > v4:
> >   - Added default handlers to be set at RTE_INIT time, rather than
> >     choosing scalar handlers.
> >   - Modified logging.
> >   - Updated enum name.
> > v3:
> >   - Moved choosing vector paths out of RTE_INIT.
> >   - Moved checking max_simd_bitwidth into the set_alg function.
> > ---
> >  lib/librte_net/rte_net_crc.c | 75 ++++++++++++++++++++++++++++++------
> >  lib/librte_net/rte_net_crc.h |  8 ++++
> >  2 files changed, 72 insertions(+), 11 deletions(-)
> >
> > diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
> > index 4f5b9e8286..11d0161a32 100644
> > --- a/lib/librte_net/rte_net_crc.c
> > +++ b/lib/librte_net/rte_net_crc.c
> > @@ -9,6 +9,7 @@
> >  #include <rte_cpuflags.h>
> >  #include <rte_common.h>
> >  #include <rte_net_crc.h>
> > +#include <rte_eal.h>
> >
> >  #if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__)
> >  #define X86_64_SSE42_PCLMULQDQ     1
> > @@ -32,6 +33,12 @@
> >  static uint32_t crc32_eth_lut[CRC_LUT_SIZE];
> >  static uint32_t crc16_ccitt_lut[CRC_LUT_SIZE];
> >
> > +static uint32_t
> > +rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len);
> > +
> > +static uint32_t
> > +rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len);
> > +
> >  static uint32_t
> >  rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len);
> >
> > @@ -41,7 +48,12 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len);
> >  typedef uint32_t
> >  (*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len);
> >
> > -static rte_net_crc_handler *handlers;
> > +static rte_net_crc_handler handlers_default[] = {
> > +	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_default_handler,
> > +	[RTE_NET_CRC32_ETH] = rte_crc32_eth_default_handler,
> > +};
> > +
> > +static rte_net_crc_handler *handlers = handlers_default;
> >
> >  static rte_net_crc_handler handlers_scalar[] = {
> >  	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler,
> > @@ -60,6 +72,9 @@ static rte_net_crc_handler handlers_neon[] = {
> >  };
> >  #endif
> >
> > +static uint16_t max_simd_bitwidth;
> > +RTE_LOG_REGISTER(libnet_logtype, lib.net, INFO);
> > +
> >  /**
> >   * Reflect the bits about the middle
> >   *
> > @@ -112,6 +127,42 @@ crc32_eth_calc_lut(const uint8_t *data,
> >  	return crc;
> >  }
> >
> > +static uint32_t
> > +rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len)
> > +{
> > +	if (max_simd_bitwidth == 0)
> > +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> > +	handlers = handlers_scalar;
> > +#ifdef X86_64_SSE42_PCLMULQDQ
> > +	if (max_simd_bitwidth >= RTE_SIMD_128)
> > +		handlers = handlers_sse42;
> > +#endif
> > +#ifdef ARM64_NEON_PMULL
> > +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
> > +			max_simd_bitwidth >= RTE_SIMD_128) {
> > +		handlers = handlers_neon;
> > +#endif
> 
> You probably don't want to make all these checks for *every* invocation
> of that function. I think it would be better:
> if (ma_simd_bitwidth == 0) {....}
> return handlers[..](...);

As another thougth - it is probably a bit safer to update max_simd_bitwidht
after handler value update.

handler = ...; rte_smp_wmb(); max_simd_width = ...;

> 
> BTW, while it allows us to use best possible handler,
> such approach means extra indirect call(/jump) anyway.
> Hard to say off-hand would it affect performance,
> and if yes how significantly.
> Couldn't find any perf tests in our UT for it...
> 
> > +	return handlers[RTE_NET_CRC16_CCITT](data, data_len);
> > +}
> > +
> > +static uint32_t
> > +rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
> > +{
> > +	if (max_simd_bitwidth == 0)
> > +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> > +	handlers = handlers_scalar;
> > +#ifdef X86_64_SSE42_PCLMULQDQ
> > +	if (max_simd_bitwidth >= RTE_SIMD_128)
> > +		handlers = handlers_sse42;
> > +#endif
> > +#ifdef ARM64_NEON_PMULL
> > +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
> > +			max_simd_bitwidth >= RTE_SIMD_128) {
> > +		handlers = handlers_neon;
> > +#endif
> > +	return handlers[RTE_NET_CRC32_ETH](data, data_len);
> > +}
> > +
> >  static void
> >  rte_net_crc_scalar_init(void)
> >  {
> > @@ -145,18 +196,26 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
> >  void
> >  rte_net_crc_set_alg(enum rte_net_crc_alg alg)
> >  {
> > +	if (max_simd_bitwidth == 0)
> > +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> > +
> >  	switch (alg) {
> >  #ifdef X86_64_SSE42_PCLMULQDQ
> >  	case RTE_NET_CRC_SSE42:
> > -		handlers = handlers_sse42;
> > -		break;
> > +		if (max_simd_bitwidth >= RTE_SIMD_128) {
> > +			handlers = handlers_sse42;
> > +			return;
> > +		}
> > +		NET_LOG(INFO, "Max SIMD Bitwidth too low, can't use SSE\n");
> >  #elif defined ARM64_NEON_PMULL
> >  		/* fall-through */
> >  	case RTE_NET_CRC_NEON:
> > -		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
> > +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
> > +				max_simd_bitwidth >= RTE_SIMD_128) {
> >  			handlers = handlers_neon;
> > -			break;
> > +			return;
> >  		}
> > +		NET_LOG(INFO, "Max SIMD Bitwidth too low or CPU flag not enabled, can't use NEON\n");
> >  #endif
> >  		/* fall-through */
> >  	case RTE_NET_CRC_SCALAR:
> > @@ -184,19 +243,13 @@ rte_net_crc_calc(const void *data,
> >  /* Select highest available crc algorithm as default one */
> >  RTE_INIT(rte_net_crc_init)
> >