Remove RTE_MACHINE_CPUFLAG_ macros from the build. Deprecation notice sent, pasted here for reference: build macros: The macros defining RTE_MACHINE_CPUFLAG_* will be removed from the build. The information provided by these macros is available through standard compiler macros. For example, RTE_MACHINE_CPUFLAG_SSE3 duplicates the compiler-provided macro __SSE3__. Radu Nicolau (4): x86: change cpuflag macros to compiler macros arm: change cpuflag macros to compiler macros ppc: change cpuflag macros to compiler macros doc: remove reference to RTE_MACHINE_CPUFLAG app/test-pmd/macswap.c | 2 +- app/test/test_memcpy_perf.c | 8 ++++---- config/arm/meson.build | 6 ------ config/ppc/meson.build | 2 -- config/x86/meson.build | 2 -- doc/guides/prog_guide/writing_efficient_code.rst | 2 +- drivers/net/enic/Makefile | 2 +- drivers/net/enic/meson.build | 2 +- drivers/net/i40e/Makefile | 2 +- drivers/net/i40e/meson.build | 2 +- drivers/net/iavf/Makefile | 2 +- drivers/net/iavf/meson.build | 2 +- drivers/net/ice/Makefile | 2 +- drivers/net/ice/meson.build | 2 +- drivers/net/ixgbe/ixgbe_ethdev.c | 2 +- examples/l3fwd/l3fwd.h | 2 +- examples/l3fwd/l3fwd_em.c | 16 ++++++++-------- examples/l3fwd/l3fwd_em_hlm.h | 2 +- examples/l3fwd/l3fwd_em_sequential.h | 2 +- examples/l3fwd/l3fwd_lpm.c | 6 +++--- lib/librte_acl/Makefile | 2 +- lib/librte_acl/meson.build | 2 +- lib/librte_eal/arm/include/rte_memcpy_32.h | 2 +- lib/librte_eal/common/rte_random.c | 4 ++-- lib/librte_eal/x86/include/rte_memcpy.h | 8 ++++---- lib/librte_efd/rte_efd_x86.h | 2 +- lib/librte_hash/Makefile | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 4 ++-- lib/librte_hash/rte_hash_crc.h | 2 +- lib/librte_hash/rte_thash.h | 4 ++-- lib/librte_member/rte_member.h | 2 +- lib/librte_member/rte_member_ht.c | 10 +++++----- lib/librte_member/rte_member_x86.h | 2 +- lib/librte_net/rte_net_crc.c | 4 ++-- lib/librte_node/ip4_lookup.c | 2 +- lib/librte_sched/rte_sched.c | 2 +- lib/librte_table/rte_lru_arm64.h | 2 +- lib/librte_table/rte_table_hash_func.h | 2 +- mk/rte.cpuflags.mk | 1 - 39 files changed, 58 insertions(+), 69 deletions(-) -- 2.17.1
Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- app/test/test_memcpy_perf.c | 8 ++++---- config/x86/meson.build | 2 -- drivers/net/enic/Makefile | 2 +- drivers/net/enic/meson.build | 2 +- drivers/net/i40e/Makefile | 2 +- drivers/net/i40e/meson.build | 2 +- drivers/net/iavf/Makefile | 2 +- drivers/net/iavf/meson.build | 2 +- drivers/net/ice/Makefile | 2 +- drivers/net/ice/meson.build | 2 +- examples/l3fwd/l3fwd_em.c | 4 ++-- lib/librte_acl/Makefile | 2 +- lib/librte_acl/meson.build | 2 +- lib/librte_eal/common/rte_random.c | 4 ++-- lib/librte_eal/x86/include/rte_memcpy.h | 8 ++++---- lib/librte_efd/rte_efd_x86.h | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 2 +- lib/librte_member/rte_member_ht.c | 10 +++++----- lib/librte_member/rte_member_x86.h | 2 +- lib/librte_net/rte_net_crc.c | 2 +- mk/rte.cpuflags.mk | 1 - 21 files changed, 31 insertions(+), 34 deletions(-) diff --git a/app/test/test_memcpy_perf.c b/app/test/test_memcpy_perf.c index 00a2092b4..c711e36ba 100644 --- a/app/test/test_memcpy_perf.c +++ b/app/test/test_memcpy_perf.c @@ -51,13 +51,13 @@ static size_t buf_sizes[TEST_VALUE_RANGE]; #define TEST_BATCH_SIZE 100 /* Data is aligned on this many bytes (power of 2) */ -#ifdef RTE_MACHINE_CPUFLAG_AVX512F +#ifdef __AVX512F__ #define ALIGNMENT_UNIT 64 -#elif defined RTE_MACHINE_CPUFLAG_AVX2 +#elif defined __AVX2__ #define ALIGNMENT_UNIT 32 -#else /* RTE_MACHINE_CPUFLAG */ +#else #define ALIGNMENT_UNIT 16 -#endif /* RTE_MACHINE_CPUFLAG */ +#endif /* * Pointers used in performance tests. The two large buffers are for uncached diff --git a/config/x86/meson.build b/config/x86/meson.build index 6ec020ef6..fea4d5403 100644 --- a/config/x86/meson.build +++ b/config/x86/meson.build @@ -18,7 +18,6 @@ endif base_flags = ['SSE', 'SSE2', 'SSE3','SSSE3', 'SSE4_1', 'SSE4_2'] foreach f:base_flags - dpdk_conf.set('RTE_MACHINE_CPUFLAG_' + f, 1) compile_time_cpuflags += ['RTE_CPUFLAG_' + f] endforeach @@ -32,7 +31,6 @@ foreach f:optional_flags elif f == 'RDRND' f = 'RDRAND' endif - dpdk_conf.set('RTE_MACHINE_CPUFLAG_' + f, 1) compile_time_cpuflags += ['RTE_CPUFLAG_' + f] endif endforeach diff --git a/drivers/net/enic/Makefile b/drivers/net/enic/Makefile index d098a474a..a6055983c 100644 --- a/drivers/net/enic/Makefile +++ b/drivers/net/enic/Makefile @@ -45,7 +45,7 @@ ifeq ($(CONFIG_RTE_ARCH_X86_64),y) # 'default' machine (corei7 which has no avx2) and run the binary on # newer CPUs that have avx2. # This part is verbatim from i40e makefile. -ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) +ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),) CC_AVX2_SUPPORT=1 else CC_AVX2_SUPPORT=\ diff --git a/drivers/net/enic/meson.build b/drivers/net/enic/meson.build index 1bd7cc7e1..896b224e4 100644 --- a/drivers/net/enic/meson.build +++ b/drivers/net/enic/meson.build @@ -19,7 +19,7 @@ deps += ['hash'] includes += include_directories('base') # The current implementation assumes 64-bit pointers -if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') and dpdk_conf.get('RTE_ARCH_64') +if cc.get_define('__AVX2__', args: machine_args) != '' and dpdk_conf.get('RTE_ARCH_64') sources += files('enic_rxtx_vec_avx2.c') # Build the avx2 handler if the compiler supports it, even though 'machine' # does not. This is to support users who build for the min supported machine diff --git a/drivers/net/i40e/Makefile b/drivers/net/i40e/Makefile index 43f10941b..bb486b83c 100644 --- a/drivers/net/i40e/Makefile +++ b/drivers/net/i40e/Makefile @@ -85,7 +85,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e_tm.c SRCS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e_vf_representor.c ifeq ($(CONFIG_RTE_LIBRTE_I40E_INC_VECTOR),y) -ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) +ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),) CC_AVX2_SUPPORT=1 else CC_AVX2_SUPPORT=\ diff --git a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index 211d45d88..68f9895cd 100644 --- a/drivers/net/i40e/meson.build +++ b/drivers/net/i40e/meson.build @@ -31,7 +31,7 @@ if arch_subdir == 'x86' # compile AVX2 version if either: # a. we have AVX supported in minimum instruction set baseline # b. it's not minimum instruction set, but supported by compiler - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' cflags += ['-DCC_AVX2_SUPPORT'] sources += files('i40e_rxtx_vec_avx2.c') elif cc.has_argument('-mavx2') diff --git a/drivers/net/iavf/Makefile b/drivers/net/iavf/Makefile index 792cbb7f7..61eca6271 100644 --- a/drivers/net/iavf/Makefile +++ b/drivers/net/iavf/Makefile @@ -31,7 +31,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_IAVF_PMD) += iavf_rxtx_vec_sse.c endif ifeq ($(CONFIG_RTE_LIBRTE_IAVF_PMD), y) - ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) + ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),) CC_AVX2_SUPPORT=1 else CC_AVX2_SUPPORT=\ diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build index a3fad363d..33407c503 100644 --- a/drivers/net/iavf/meson.build +++ b/drivers/net/iavf/meson.build @@ -21,7 +21,7 @@ if arch_subdir == 'x86' # compile AVX2 version if either: # a. we have AVX supported in minimum instruction set baseline # b. it's not minimum instruction set, but supported by compiler - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' cflags += ['-DCC_AVX2_SUPPORT'] sources += files('iavf_rxtx_vec_avx2.c') elif cc.has_argument('-mavx2') diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile index 34cd4024b..6beb7ba40 100644 --- a/drivers/net/ice/Makefile +++ b/drivers/net/ice/Makefile @@ -66,7 +66,7 @@ endif SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_switch_filter.c SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_fdir_filter.c SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_hash.c -ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) +ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),) CC_AVX2_SUPPORT=1 else CC_AVX2_SUPPORT=\ diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build index e6fe74487..99e1b773a 100644 --- a/drivers/net/ice/meson.build +++ b/drivers/net/ice/meson.build @@ -22,7 +22,7 @@ if arch_subdir == 'x86' # compile AVX2 version if either: # a. we have AVX supported in minimum instruction set baseline # b. it's not minimum instruction set, but supported by compiler - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' sources += files('ice_rxtx_vec_avx2.c') elif cc.has_argument('-mavx2') ice_avx2_lib = static_library('ice_avx2_lib', diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index fdbee70b4..78181a640 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -215,7 +215,7 @@ static rte_xmm_t mask0; static rte_xmm_t mask1; static rte_xmm_t mask2; -#if defined(RTE_MACHINE_CPUFLAG_SSE2) +#if defined(__SSE2__) static inline xmm_t em_mask_key(void *key, xmm_t mask) { @@ -231,7 +231,7 @@ em_mask_key(void *key, xmm_t mask) return vandq_s32(data, mask); } -#elif defined(RTE_MACHINE_CPUFLAG_ALTIVEC) +#elif defined(__ALTIVEC__) static inline xmm_t em_mask_key(void *key, xmm_t mask) { diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile index f4332b044..3b591c2ed 100644 --- a/lib/librte_acl/Makefile +++ b/lib/librte_acl/Makefile @@ -38,7 +38,7 @@ endif # #check if flag for AVX2 is already on, if not set it up manually -ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) +ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),) CC_AVX2_SUPPORT=1 else CC_AVX2_SUPPORT=\ diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build index d1e2c184c..b31a3f798 100644 --- a/lib/librte_acl/meson.build +++ b/lib/librte_acl/meson.build @@ -15,7 +15,7 @@ if dpdk_conf.has('RTE_ARCH_X86') # in former case, just add avx2 C file to files list # in latter case, compile c file to static lib, using correct compiler # flags, and then have the .o file from static lib linked into main lib. - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' sources += files('acl_run_avx2.c') cflags += '-DCC_AVX2_SUPPORT' elif cc.has_argument('-mavx2') diff --git a/lib/librte_eal/common/rte_random.c b/lib/librte_eal/common/rte_random.c index b7a089ac4..b2c5416b3 100644 --- a/lib/librte_eal/common/rte_random.c +++ b/lib/librte_eal/common/rte_random.c @@ -2,7 +2,7 @@ * Copyright(c) 2019 Ericsson AB */ -#ifdef RTE_MACHINE_CPUFLAG_RDSEED +#ifdef __RDSEED__ #include <x86intrin.h> #endif #include <stdlib.h> @@ -188,7 +188,7 @@ __rte_random_initial_seed(void) if (ge_rc == 0) return ge_seed; #endif -#ifdef RTE_MACHINE_CPUFLAG_RDSEED +#ifdef __RDSEED__ unsigned int rdseed_low; unsigned int rdseed_high; diff --git a/lib/librte_eal/x86/include/rte_memcpy.h b/lib/librte_eal/x86/include/rte_memcpy.h index 9c67232df..008a3de67 100644 --- a/lib/librte_eal/x86/include/rte_memcpy.h +++ b/lib/librte_eal/x86/include/rte_memcpy.h @@ -45,7 +45,7 @@ extern "C" { static __rte_always_inline void * rte_memcpy(void *dst, const void *src, size_t n); -#ifdef RTE_MACHINE_CPUFLAG_AVX512F +#ifdef __AVX512F__ #define ALIGNMENT_MASK 0x3F @@ -286,7 +286,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) goto COPY_BLOCK_128_BACK63; } -#elif defined RTE_MACHINE_CPUFLAG_AVX2 +#elif defined __AVX2__ #define ALIGNMENT_MASK 0x1F @@ -479,7 +479,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) goto COPY_BLOCK_128_BACK31; } -#else /* RTE_MACHINE_CPUFLAG */ +#else /* __AVX512F__ */ #define ALIGNMENT_MASK 0x0F @@ -803,7 +803,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) goto COPY_BLOCK_64_BACK15; } -#endif /* RTE_MACHINE_CPUFLAG */ +#endif /* __AVX512F__ */ static __rte_always_inline void * rte_memcpy_aligned(void *dst, const void *src, size_t n) diff --git a/lib/librte_efd/rte_efd_x86.h b/lib/librte_efd/rte_efd_x86.h index 6c207e87d..e2f9dcca8 100644 --- a/lib/librte_efd/rte_efd_x86.h +++ b/lib/librte_efd/rte_efd_x86.h @@ -19,7 +19,7 @@ efd_lookup_internal_avx2(const efd_hashfunc_t *group_hash_idx, const efd_lookuptbl_t *group_lookup_table, const uint32_t hash_val_a, const uint32_t hash_val_b) { -#ifdef RTE_MACHINE_CPUFLAG_AVX2 +#ifdef __AVX2__ efd_value_t value = 0; uint32_t i = 0; __m256i vhash_val_a = _mm256_set1_epi32(hash_val_a); diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 0a6d47471..7c7ab84af 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -1691,7 +1691,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, /* For match mask the first bit of every two bits indicates the match */ switch (sig_cmp_fn) { -#if defined(RTE_MACHINE_CPUFLAG_SSE2) +#if defined(__SSE2__) case RTE_HASH_COMPARE_SSE: /* Compare all signatures in the bucket */ *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16( diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c index cbcd0d440..3ea293a09 100644 --- a/lib/librte_member/rte_member_ht.c +++ b/lib/librte_member/rte_member_ht.c @@ -176,7 +176,7 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss, get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets, set_id) || @@ -216,7 +216,7 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss, for (i = 0; i < num_keys; i++) { switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: if (search_bucket_single_avx(prim_buckets[i], tmp_sig[i], buckets, &set_id[i]) || @@ -253,7 +253,7 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss, get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: search_bucket_multi_avx(prim_bucket, tmp_sig, buckets, &num_matches, match_per_key, set_id); @@ -296,7 +296,7 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss, match_cnt_tmp = 0; switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: search_bucket_multi_avx(prim_buckets[i], tmp_sig[i], buckets, &match_cnt_tmp, match_per_key, @@ -357,7 +357,7 @@ try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, enum rte_member_sig_compare_function cmp_fn) { switch (cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: if (update_entry_search_avx(prim, sig, buckets, set_id) || update_entry_search_avx(sec, sig, buckets, diff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h index 21a498ef0..74c8e3885 100644 --- a/lib/librte_member/rte_member_x86.h +++ b/lib/librte_member/rte_member_x86.h @@ -11,7 +11,7 @@ extern "C" { #include <x86intrin.h> -#if defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(__AVX2__) static inline int update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig, diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 9fd4794a9..56a0ed129 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -10,7 +10,7 @@ #include <rte_common.h> #include <rte_net_crc.h> -#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ) +#if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__) #define X86_64_SSE42_PCLMULQDQ 1 #elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL) #define ARM64_NEON_PMULL 1 diff --git a/mk/rte.cpuflags.mk b/mk/rte.cpuflags.mk index fa8753531..e517524b6 100644 --- a/mk/rte.cpuflags.mk +++ b/mk/rte.cpuflags.mk @@ -113,7 +113,6 @@ CPUFLAGS += SHA1 CPUFLAGS += SHA2 endif -MACHINE_CFLAGS += $(addprefix -DRTE_MACHINE_CPUFLAG_,$(CPUFLAGS)) # To strip whitespace comma:= , -- 2.17.1
Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- app/test-pmd/macswap.c | 2 +- config/arm/meson.build | 6 ------ drivers/net/ixgbe/ixgbe_ethdev.c | 2 +- examples/l3fwd/l3fwd.h | 2 +- examples/l3fwd/l3fwd_em.c | 12 ++++++------ examples/l3fwd/l3fwd_em_hlm.h | 2 +- examples/l3fwd/l3fwd_em_sequential.h | 2 +- examples/l3fwd/l3fwd_lpm.c | 6 +++--- lib/librte_eal/arm/include/rte_memcpy_32.h | 2 +- lib/librte_hash/Makefile | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 2 +- lib/librte_hash/rte_hash_crc.h | 2 +- lib/librte_hash/rte_thash.h | 4 ++-- lib/librte_member/rte_member.h | 2 +- lib/librte_net/rte_net_crc.c | 2 +- lib/librte_node/ip4_lookup.c | 2 +- lib/librte_sched/rte_sched.c | 2 +- lib/librte_table/rte_lru_arm64.h | 2 +- lib/librte_table/rte_table_hash_func.h | 2 +- 19 files changed, 26 insertions(+), 32 deletions(-) diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index fbe8cb39e..c84e65000 100644 --- a/app/test-pmd/macswap.c +++ b/app/test-pmd/macswap.c @@ -39,7 +39,7 @@ #include "testpmd.h" #if defined(RTE_ARCH_X86) #include "macswap_sse.h" -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM__NEON) #include "macswap_neon.h" #else #include "macswap.h" diff --git a/config/arm/meson.build b/config/arm/meson.build index 8728051d5..42c0c34a5 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -208,20 +208,14 @@ message(machine_args) if (cc.get_define('__ARM_NEON', args: machine_args) != '' or cc.get_define('__aarch64__', args: machine_args) != '') - dpdk_conf.set('RTE_MACHINE_CPUFLAG_NEON', 1) compile_time_cpuflags += ['RTE_CPUFLAG_NEON'] endif if cc.get_define('__ARM_FEATURE_CRC32', args: machine_args) != '' - dpdk_conf.set('RTE_MACHINE_CPUFLAG_CRC32', 1) compile_time_cpuflags += ['RTE_CPUFLAG_CRC32'] endif if cc.get_define('__ARM_FEATURE_CRYPTO', args: machine_args) != '' - dpdk_conf.set('RTE_MACHINE_CPUFLAG_AES', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_PMULL', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_SHA1', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_SHA2', 1) compile_time_cpuflags += ['RTE_CPUFLAG_AES', 'RTE_CPUFLAG_PMULL', 'RTE_CPUFLAG_SHA1', 'RTE_CPUFLAG_SHA2'] endif diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index fd0cb9b0e..f70012684 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -3960,7 +3960,7 @@ ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev) dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc) return ptypes; -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(RTE_ARCH_X86) || defined(__ARM_NEON) if (dev->rx_pkt_burst == ixgbe_recv_pkts_vec || dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec) return ptypes; diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index 67055431f..2cf06099e 100644 --- a/examples/l3fwd/l3fwd.h +++ b/examples/l3fwd/l3fwd.h @@ -12,7 +12,7 @@ #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 -#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON) +#if !defined(NO_HASH_MULTI_LOOKUP) && defined(__ARM_NEON) #define NO_HASH_MULTI_LOOKUP 1 #endif diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index 78181a640..c529dcd3e 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -28,7 +28,7 @@ #include "l3fwd.h" #include "l3fwd_event.h" -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) #define EM_HASH_CRC 1 #endif @@ -223,7 +223,7 @@ em_mask_key(void *key, xmm_t mask) return _mm_and_si128(data, mask); } -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) static inline xmm_t em_mask_key(void *key, xmm_t mask) { @@ -303,7 +303,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid, void *lookup_struct) return (ret < 0) ? portid : ipv6_l3fwd_out_if[ret]; } -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON #if defined(NO_HASH_MULTI_LOOKUP) #include "l3fwd_em_sequential.h" #else @@ -685,7 +685,7 @@ em_main_loop(__rte_unused void *dummy) if (nb_rx == 0) continue; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON l3fwd_em_send_packets(nb_rx, pkts_burst, portid, qconf); #else @@ -723,7 +723,7 @@ em_event_loop_single(struct l3fwd_event_resources *evt_rsrc, struct rte_mbuf *mbuf = ev.mbuf; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON mbuf->port = em_get_dst_port(lconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); #else @@ -784,7 +784,7 @@ em_event_loop_burst(struct l3fwd_event_resources *evt_rsrc, continue; } -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON l3fwd_em_process_events(nb_deq, (struct rte_event **)&events, lconf); #else diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 79812716c..278707c18 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -9,7 +9,7 @@ #if defined RTE_ARCH_X86 #include "l3fwd_sse.h" #include "l3fwd_em_hlm_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_neon.h" #include "l3fwd_em_hlm_neon.h" #endif diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index b231b9994..6170052cf 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -16,7 +16,7 @@ #if defined RTE_ARCH_X86 #include "l3fwd_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_neon.h" #endif diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index 91eb74272..3dcf1fef1 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -163,7 +163,7 @@ lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt, #if defined(RTE_ARCH_X86) #include "l3fwd_lpm_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_lpm_neon.h" #elif defined(RTE_ARCH_PPC_64) #include "l3fwd_lpm_altivec.h" @@ -240,7 +240,7 @@ lpm_main_loop(__rte_unused void *dummy) if (nb_rx == 0) continue; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON \ +#if defined RTE_ARCH_X86 || defined __ARM_NEON \ || defined RTE_ARCH_PPC_64 l3fwd_lpm_send_packets(nb_rx, pkts_burst, portid, qconf); @@ -259,7 +259,7 @@ lpm_process_event_pkt(const struct lcore_conf *lconf, struct rte_mbuf *mbuf) { mbuf->port = lpm_get_dst_port(lconf, mbuf, mbuf->port); -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON \ +#if defined RTE_ARCH_X86 || defined __ARM_NEON \ || defined RTE_ARCH_PPC_64 process_packet(mbuf, &mbuf->port); #else diff --git a/lib/librte_eal/arm/include/rte_memcpy_32.h b/lib/librte_eal/arm/include/rte_memcpy_32.h index eb02c3b41..fb3245b59 100644 --- a/lib/librte_eal/arm/include/rte_memcpy_32.h +++ b/lib/librte_eal/arm/include/rte_memcpy_32.h @@ -16,7 +16,7 @@ extern "C" { #ifdef RTE_ARCH_ARM_NEON_MEMCPY -#ifndef RTE_MACHINE_CPUFLAG_NEON +#ifndef __ARM_NEON #error "Cannot optimize memcpy by NEON as the CPU seems to not support this" #endif diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index ec9f86499..b84a40d55 100644 --- a/lib/librte_hash/Makefile +++ b/lib/librte_hash/Makefile @@ -20,7 +20,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h ifeq ($(CONFIG_RTE_ARCH_ARM64),y) -ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),) +ifneq ($(filter $(AUTO_CPUFLAGS),__ARM_FEATURE_CRC32),) SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h endif endif diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 7c7ab84af..aad0c965b 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -1704,7 +1704,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, (__m128i const *)sec_bkt->sig_current), _mm_set1_epi16(sig))); break; -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) case RTE_HASH_COMPARE_NEON: { uint16x8_t vmat, vsig, x; int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1}; diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index cf28031b3..3e131aa6b 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -424,7 +424,7 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val) static uint8_t crc32_alg = CRC32_SW; -#if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32) #include "rte_crc_arm64.h" #else diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index 51b512946..061efa2ae 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -28,7 +28,7 @@ extern "C" { #include <rte_ip.h> #include <rte_common.h> -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(RTE_ARCH_X86) || defined(__ARM_NEON) #include <rte_vect.h> #endif @@ -149,7 +149,7 @@ rte_thash_load_v6_addrs(const struct rte_ipv6_hdr *orig, ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr); *(__m128i *)targ->v6.dst_addr = _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) uint8x16_t ipv6 = vld1q_u8((uint8_t const *)orig->src_addr); vst1q_u8((uint8_t *)targ->v6.src_addr, vrev32q_u8(ipv6)); ipv6 = vld1q_u8((uint8_t const *)orig->dst_addr); diff --git a/lib/librte_member/rte_member.h b/lib/librte_member/rte_member.h index ab2b23217..c0689e233 100644 --- a/lib/librte_member/rte_member.h +++ b/lib/librte_member/rte_member.h @@ -68,7 +68,7 @@ typedef uint16_t member_set_t; #define RTE_MEMBER_NAMESIZE 32 /** @internal Hash function used by membership library. */ -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) #include <rte_hash_crc.h> #define MEMBER_HASH_FUNC rte_hash_crc #else diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 56a0ed129..4f5b9e828 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -12,7 +12,7 @@ #if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__) #define X86_64_SSE42_PCLMULQDQ 1 -#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL) +#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRYPTO) #define ARM64_NEON_PMULL 1 #endif diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c index 8e6379457..293c77f39 100644 --- a/lib/librte_node/ip4_lookup.c +++ b/lib/librte_node/ip4_lookup.c @@ -30,7 +30,7 @@ struct ip4_lookup_node_main { static struct ip4_lookup_node_main ip4_lookup_nm; -#if defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(__ARM_NEON) #include "ip4_lookup_neon.h" #elif defined(RTE_ARCH_X86) #include "ip4_lookup_sse.h" diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 0fa074166..75be8b6bd 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -29,7 +29,7 @@ #ifdef RTE_ARCH_X86 #define SCHED_VECTOR_SSE4 -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) #define SCHED_VECTOR_NEON #endif diff --git a/lib/librte_table/rte_lru_arm64.h b/lib/librte_table/rte_lru_arm64.h index b45e9d03c..add889a57 100644 --- a/lib/librte_table/rte_lru_arm64.h +++ b/lib/librte_table/rte_lru_arm64.h @@ -13,7 +13,7 @@ extern "C" { #include <rte_vect.h> #ifndef RTE_TABLE_HASH_LRU_STRATEGY -#ifdef RTE_MACHINE_CPUFLAG_NEON +#ifdef __ARM_NEON #define RTE_TABLE_HASH_LRU_STRATEGY 3 #else /* if no NEON, use simple scalar version */ #define RTE_TABLE_HASH_LRU_STRATEGY 1 diff --git a/lib/librte_table/rte_table_hash_func.h b/lib/librte_table/rte_table_hash_func.h index 350c79564..c4c35cc06 100644 --- a/lib/librte_table/rte_table_hash_func.h +++ b/lib/librte_table/rte_table_hash_func.h @@ -41,7 +41,7 @@ rte_crc32_u64(uint64_t crc, uint64_t v) return _mm_crc32_u64(crc, v); } -#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) +#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32) #include "rte_table_hash_func_arm64.h" #else -- 2.17.1
Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- config/ppc/meson.build | 2 -- 1 file changed, 2 deletions(-) diff --git a/config/ppc/meson.build b/config/ppc/meson.build index aa7d73d11..0d8da87e6 100644 --- a/config/ppc/meson.build +++ b/config/ppc/meson.build @@ -21,5 +21,3 @@ endif dpdk_conf.set('RTE_MAX_LCORE', 1536) dpdk_conf.set('RTE_MAX_NUMA_NODES', 32) dpdk_conf.set('RTE_CACHE_LINE_SIZE', 128) -dpdk_conf.set('RTE_MACHINE_CPUFLAG_ALTIVEC', 1) -dpdk_conf.set('RTE_MACHINE_CPUFLAG_VSX', 1) -- 2.17.1
RTE_MACHINE_CPUFLAG macros are replaced with predefined compiler defines. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- doc/guides/prog_guide/writing_efficient_code.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/guides/prog_guide/writing_efficient_code.rst b/doc/guides/prog_guide/writing_efficient_code.rst index 2639ef7bf..c1f2d627a 100644 --- a/doc/guides/prog_guide/writing_efficient_code.rst +++ b/doc/guides/prog_guide/writing_efficient_code.rst @@ -273,5 +273,5 @@ main() function and checks if the current machine is suitable for running the bi Along with compiler optimizations, a set of preprocessor defines are automatically added to the build process (regardless of the compiler version). These defines correspond to the instruction sets that the target CPU should be able to support. -For example, a binary compiled for any SSE4.2-capable processor will have RTE_MACHINE_CPUFLAG_SSE4_2 defined, +For example, a binary compiled for any SSE4.2-capable processor will have a pre-defined compiler macro, thus enabling compile-time code path selection for different platforms. -- 2.17.1
On Wed, Sep 02, 2020 at 10:43:40AM +0000, Radu Nicolau wrote: > Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler > macros. > I think it's worth noting in the commit log that the set of macros provided by the compilers are more complete than those provided by DPDK, and by not having our own it allows new instruction sets to be leveraged without having to do extra work to set them up in DPDK. > Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> > Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> > --- > app/test/test_memcpy_perf.c | 8 ++++---- > config/x86/meson.build | 2 -- > drivers/net/enic/Makefile | 2 +- > drivers/net/enic/meson.build | 2 +- > drivers/net/i40e/Makefile | 2 +- > drivers/net/i40e/meson.build | 2 +- > drivers/net/iavf/Makefile | 2 +- > drivers/net/iavf/meson.build | 2 +- > drivers/net/ice/Makefile | 2 +- > drivers/net/ice/meson.build | 2 +- > examples/l3fwd/l3fwd_em.c | 4 ++-- > lib/librte_acl/Makefile | 2 +- > lib/librte_acl/meson.build | 2 +- > lib/librte_eal/common/rte_random.c | 4 ++-- > lib/librte_eal/x86/include/rte_memcpy.h | 8 ++++---- > lib/librte_efd/rte_efd_x86.h | 2 +- > lib/librte_hash/rte_cuckoo_hash.c | 2 +- > lib/librte_member/rte_member_ht.c | 10 +++++----- > lib/librte_member/rte_member_x86.h | 2 +- > lib/librte_net/rte_net_crc.c | 2 +- > mk/rte.cpuflags.mk | 1 - > 21 files changed, 31 insertions(+), 34 deletions(-) > <snip> > @@ -231,7 +231,7 @@ em_mask_key(void *key, xmm_t mask) > > return vandq_s32(data, mask); > } > -#elif defined(RTE_MACHINE_CPUFLAG_ALTIVEC) > +#elif defined(__ALTIVEC__) Not an x86 flag. Belongs in patch 3. > static inline xmm_t > em_mask_key(void *key, xmm_t mask) > { > diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile > index f4332b044..3b591c2ed 100644 > --- a/lib/librte_acl/Makefile > +++ b/lib/librte_acl/Makefile > @@ -38,7 +38,7 @@ endif > # > > #check if flag for AVX2 is already on, if not set it up manually > -ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) > +ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),) > CC_AVX2_SUPPORT=1 > else > CC_AVX2_SUPPORT=\ > diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build > index d1e2c184c..b31a3f798 100644 > --- a/lib/librte_acl/meson.build > +++ b/lib/librte_acl/meson.build > @@ -15,7 +15,7 @@ if dpdk_conf.has('RTE_ARCH_X86') > # in former case, just add avx2 C file to files list > # in latter case, compile c file to static lib, using correct compiler > # flags, and then have the .o file from static lib linked into main lib. > - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') > + if cc.get_define('__AVX2__', args: machine_args) != '' Since this is used in a number of places, we probably should just get the result in a variable in config/x86/meson.build. > sources += files('acl_run_avx2.c') > cflags += '-DCC_AVX2_SUPPORT' > elif cc.has_argument('-mavx2') <snip>
On Wed, Sep 02, 2020 at 10:43:43AM +0000, Radu Nicolau wrote:
> RTE_MACHINE_CPUFLAG macros are replaced with predefined
> compiler defines.
>
> Signed-off-by: Sean Morrissey <sean.morrissey@intel.com>
> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> ---
> doc/guides/prog_guide/writing_efficient_code.rst | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/doc/guides/prog_guide/writing_efficient_code.rst b/doc/guides/prog_guide/writing_efficient_code.rst
> index 2639ef7bf..c1f2d627a 100644
> --- a/doc/guides/prog_guide/writing_efficient_code.rst
> +++ b/doc/guides/prog_guide/writing_efficient_code.rst
> @@ -273,5 +273,5 @@ main() function and checks if the current machine is suitable for running the bi
> Along with compiler optimizations,
> a set of preprocessor defines are automatically added to the build process (regardless of the compiler version).
> These defines correspond to the instruction sets that the target CPU should be able to support.
> -For example, a binary compiled for any SSE4.2-capable processor will have RTE_MACHINE_CPUFLAG_SSE4_2 defined,
> +For example, a binary compiled for any SSE4.2-capable processor will have a pre-defined compiler macro,
> thus enabling compile-time code path selection for different platforms.
> --
Personally, I'd suggest just removing the whole section rather than part of
a single line.
On 9/2/20 3:43 AM, Radu Nicolau wrote:
> Remove RTE_MACHINE_CPUFLAG_ macros from the build.
> Deprecation notice sent, pasted here for reference:
>
> build macros: The macros defining RTE_MACHINE_CPUFLAG_* will be removed
> from the build. The information provided by these macros is available
> through standard compiler macros. For example, RTE_MACHINE_CPUFLAG_SSE3
> duplicates the compiler-provided macro __SSE3__.
>
> Radu Nicolau (4):
> x86: change cpuflag macros to compiler macros
> arm: change cpuflag macros to compiler macros
> ppc: change cpuflag macros to compiler macros
> doc: remove reference to RTE_MACHINE_CPUFLAG
I'm not too familiar with clang and icc. Do all compilers use the same
macro definitions for the same CPU features? I would have thought the
RTE_* definitions were there because there are compiler or compiler
version differences that need to be supported.
Dave
On 9/2/2020 5:57 PM, David Christensen wrote:
> On 9/2/20 3:43 AM, Radu Nicolau wrote:
>> Remove RTE_MACHINE_CPUFLAG_ macros from the build.
>> Deprecation notice sent, pasted here for reference:
>>
>> build macros: The macros defining RTE_MACHINE_CPUFLAG_* will be
>> removed
>> from the build. The information provided by these macros is available
>> through standard compiler macros. For example,
>> RTE_MACHINE_CPUFLAG_SSE3
>> duplicates the compiler-provided macro __SSE3__.
>>
>> Radu Nicolau (4):
>> x86: change cpuflag macros to compiler macros
>> arm: change cpuflag macros to compiler macros
>> ppc: change cpuflag macros to compiler macros
>> doc: remove reference to RTE_MACHINE_CPUFLAG
>
> I'm not too familiar with clang and icc. Do all compilers use the
> same macro definitions for the same CPU features? I would have
> thought the RTE_* definitions were there because there are compiler or
> compiler version differences that need to be supported.
>
> Dave
All supported compilers should have these defines, and the
RTE_MACHINE_CPU macros were defined based on the compiler defined
macros, as an unnecessary extra step.
Furthermore, as per Bruce's comment, new features will be easily
implemented directly in the relevant sections without needing to update
the build system.
Hello Radu,
On Wed, Sep 2, 2020 at 12:43 PM Radu Nicolau <radu.nicolau@intel.com> wrote:
>
> Remove RTE_MACHINE_CPUFLAG_ macros from the build.
> Deprecation notice sent, pasted here for reference:
>
> build macros: The macros defining RTE_MACHINE_CPUFLAG_* will be removed
> from the build. The information provided by these macros is available
> through standard compiler macros. For example, RTE_MACHINE_CPUFLAG_SSE3
> duplicates the compiler-provided macro __SSE3__.
>
> Radu Nicolau (4):
> x86: change cpuflag macros to compiler macros
> arm: change cpuflag macros to compiler macros
> ppc: change cpuflag macros to compiler macros
> doc: remove reference to RTE_MACHINE_CPUFLAG
Expecting a v2 to address Bruce comment.
Thanks.
--
David Marchand
Remove RTE_MACHINE_CPUFLAG_ macros from the build. Deprecation notice sent, pasted here for reference: * build macros: The macros defining RTE_MACHINE_CPUFLAG_* will be removed from the build. The information provided by these macros is available through standard compiler macros. For example, RTE_MACHINE_CPUFLAG_SSE3 duplicates the compiler-provided macro __SSE3__. Radu Nicolau (4): x86: change cpuflag macros to compiler macros arm: change cpuflag macros to compiler macros ppc: change cpuflag macros to compiler macros doc: remove reference to RTE_MACHINE_CPUFLAG app/test-pmd/macswap.c | 2 +- app/test/test_memcpy_perf.c | 8 ++++---- config/arm/meson.build | 6 ------ config/ppc/meson.build | 2 -- config/x86/meson.build | 2 -- doc/guides/prog_guide/writing_efficient_code.rst | 2 -- doc/guides/rel_notes/release_20_11.rst | 1 + drivers/net/enic/meson.build | 2 +- drivers/net/i40e/meson.build | 2 +- drivers/net/iavf/meson.build | 2 +- drivers/net/ice/meson.build | 2 +- drivers/net/ixgbe/ixgbe_ethdev.c | 2 +- examples/l3fwd/l3fwd.h | 2 +- examples/l3fwd/l3fwd_em.c | 16 ++++++++-------- examples/l3fwd/l3fwd_em_hlm.h | 2 +- examples/l3fwd/l3fwd_em_sequential.h | 2 +- examples/l3fwd/l3fwd_lpm.c | 6 +++--- lib/librte_acl/meson.build | 2 +- lib/librte_eal/arm/include/rte_memcpy_32.h | 2 +- lib/librte_eal/common/rte_random.c | 4 ++-- lib/librte_eal/x86/include/rte_memcpy.h | 8 ++++---- lib/librte_efd/rte_efd_x86.h | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 4 ++-- lib/librte_hash/rte_hash_crc.h | 2 +- lib/librte_hash/rte_thash.h | 4 ++-- lib/librte_member/rte_member.h | 2 +- lib/librte_member/rte_member_ht.c | 10 +++++----- lib/librte_member/rte_member_x86.h | 2 +- lib/librte_net/rte_net_crc.c | 4 ++-- lib/librte_node/ip4_lookup.c | 2 +- lib/librte_sched/rte_sched.c | 2 +- lib/librte_table/rte_lru_arm64.h | 2 +- lib/librte_table/rte_table_hash_func.h | 2 +- 33 files changed, 52 insertions(+), 63 deletions(-) -- 2.17.1
Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros, which are more complete than those provided by DPDK, and as such it allows new instruction sets to be leveraged without having to do extra work to set them up in DPDK. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- app/test/test_memcpy_perf.c | 8 ++++---- config/x86/meson.build | 2 -- drivers/net/enic/meson.build | 2 +- drivers/net/i40e/meson.build | 2 +- drivers/net/iavf/meson.build | 2 +- drivers/net/ice/meson.build | 2 +- examples/l3fwd/l3fwd_em.c | 4 ++-- lib/librte_acl/meson.build | 2 +- lib/librte_eal/common/rte_random.c | 4 ++-- lib/librte_eal/x86/include/rte_memcpy.h | 8 ++++---- lib/librte_efd/rte_efd_x86.h | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 2 +- lib/librte_member/rte_member_ht.c | 10 +++++----- lib/librte_member/rte_member_x86.h | 2 +- lib/librte_net/rte_net_crc.c | 2 +- 15 files changed, 26 insertions(+), 28 deletions(-) diff --git a/app/test/test_memcpy_perf.c b/app/test/test_memcpy_perf.c index 00a2092b4..c711e36ba 100644 --- a/app/test/test_memcpy_perf.c +++ b/app/test/test_memcpy_perf.c @@ -51,13 +51,13 @@ static size_t buf_sizes[TEST_VALUE_RANGE]; #define TEST_BATCH_SIZE 100 /* Data is aligned on this many bytes (power of 2) */ -#ifdef RTE_MACHINE_CPUFLAG_AVX512F +#ifdef __AVX512F__ #define ALIGNMENT_UNIT 64 -#elif defined RTE_MACHINE_CPUFLAG_AVX2 +#elif defined __AVX2__ #define ALIGNMENT_UNIT 32 -#else /* RTE_MACHINE_CPUFLAG */ +#else #define ALIGNMENT_UNIT 16 -#endif /* RTE_MACHINE_CPUFLAG */ +#endif /* * Pointers used in performance tests. The two large buffers are for uncached diff --git a/config/x86/meson.build b/config/x86/meson.build index 6ec020ef6..fea4d5403 100644 --- a/config/x86/meson.build +++ b/config/x86/meson.build @@ -18,7 +18,6 @@ endif base_flags = ['SSE', 'SSE2', 'SSE3','SSSE3', 'SSE4_1', 'SSE4_2'] foreach f:base_flags - dpdk_conf.set('RTE_MACHINE_CPUFLAG_' + f, 1) compile_time_cpuflags += ['RTE_CPUFLAG_' + f] endforeach @@ -32,7 +31,6 @@ foreach f:optional_flags elif f == 'RDRND' f = 'RDRAND' endif - dpdk_conf.set('RTE_MACHINE_CPUFLAG_' + f, 1) compile_time_cpuflags += ['RTE_CPUFLAG_' + f] endif endforeach diff --git a/drivers/net/enic/meson.build b/drivers/net/enic/meson.build index 7f4836d0f..86ef2a8a2 100644 --- a/drivers/net/enic/meson.build +++ b/drivers/net/enic/meson.build @@ -20,7 +20,7 @@ deps += ['hash'] includes += include_directories('base') # The current implementation assumes 64-bit pointers -if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') and dpdk_conf.get('RTE_ARCH_64') +if cc.get_define('__AVX2__', args: machine_args) != '' and dpdk_conf.get('RTE_ARCH_64') sources += files('enic_rxtx_vec_avx2.c') # Build the avx2 handler if the compiler supports it, even though 'machine' # does not. This is to support users who build for the min supported machine diff --git a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index 211d45d88..68f9895cd 100644 --- a/drivers/net/i40e/meson.build +++ b/drivers/net/i40e/meson.build @@ -31,7 +31,7 @@ if arch_subdir == 'x86' # compile AVX2 version if either: # a. we have AVX supported in minimum instruction set baseline # b. it's not minimum instruction set, but supported by compiler - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' cflags += ['-DCC_AVX2_SUPPORT'] sources += files('i40e_rxtx_vec_avx2.c') elif cc.has_argument('-mavx2') diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build index a3fad363d..33407c503 100644 --- a/drivers/net/iavf/meson.build +++ b/drivers/net/iavf/meson.build @@ -21,7 +21,7 @@ if arch_subdir == 'x86' # compile AVX2 version if either: # a. we have AVX supported in minimum instruction set baseline # b. it's not minimum instruction set, but supported by compiler - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' cflags += ['-DCC_AVX2_SUPPORT'] sources += files('iavf_rxtx_vec_avx2.c') elif cc.has_argument('-mavx2') diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build index e6fe74487..99e1b773a 100644 --- a/drivers/net/ice/meson.build +++ b/drivers/net/ice/meson.build @@ -22,7 +22,7 @@ if arch_subdir == 'x86' # compile AVX2 version if either: # a. we have AVX supported in minimum instruction set baseline # b. it's not minimum instruction set, but supported by compiler - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' sources += files('ice_rxtx_vec_avx2.c') elif cc.has_argument('-mavx2') ice_avx2_lib = static_library('ice_avx2_lib', diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index fdbee70b4..78181a640 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -215,7 +215,7 @@ static rte_xmm_t mask0; static rte_xmm_t mask1; static rte_xmm_t mask2; -#if defined(RTE_MACHINE_CPUFLAG_SSE2) +#if defined(__SSE2__) static inline xmm_t em_mask_key(void *key, xmm_t mask) { @@ -231,7 +231,7 @@ em_mask_key(void *key, xmm_t mask) return vandq_s32(data, mask); } -#elif defined(RTE_MACHINE_CPUFLAG_ALTIVEC) +#elif defined(__ALTIVEC__) static inline xmm_t em_mask_key(void *key, xmm_t mask) { diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build index d1e2c184c..b31a3f798 100644 --- a/lib/librte_acl/meson.build +++ b/lib/librte_acl/meson.build @@ -15,7 +15,7 @@ if dpdk_conf.has('RTE_ARCH_X86') # in former case, just add avx2 C file to files list # in latter case, compile c file to static lib, using correct compiler # flags, and then have the .o file from static lib linked into main lib. - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' sources += files('acl_run_avx2.c') cflags += '-DCC_AVX2_SUPPORT' elif cc.has_argument('-mavx2') diff --git a/lib/librte_eal/common/rte_random.c b/lib/librte_eal/common/rte_random.c index b7a089ac4..b2c5416b3 100644 --- a/lib/librte_eal/common/rte_random.c +++ b/lib/librte_eal/common/rte_random.c @@ -2,7 +2,7 @@ * Copyright(c) 2019 Ericsson AB */ -#ifdef RTE_MACHINE_CPUFLAG_RDSEED +#ifdef __RDSEED__ #include <x86intrin.h> #endif #include <stdlib.h> @@ -188,7 +188,7 @@ __rte_random_initial_seed(void) if (ge_rc == 0) return ge_seed; #endif -#ifdef RTE_MACHINE_CPUFLAG_RDSEED +#ifdef __RDSEED__ unsigned int rdseed_low; unsigned int rdseed_high; diff --git a/lib/librte_eal/x86/include/rte_memcpy.h b/lib/librte_eal/x86/include/rte_memcpy.h index 9c67232df..008a3de67 100644 --- a/lib/librte_eal/x86/include/rte_memcpy.h +++ b/lib/librte_eal/x86/include/rte_memcpy.h @@ -45,7 +45,7 @@ extern "C" { static __rte_always_inline void * rte_memcpy(void *dst, const void *src, size_t n); -#ifdef RTE_MACHINE_CPUFLAG_AVX512F +#ifdef __AVX512F__ #define ALIGNMENT_MASK 0x3F @@ -286,7 +286,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) goto COPY_BLOCK_128_BACK63; } -#elif defined RTE_MACHINE_CPUFLAG_AVX2 +#elif defined __AVX2__ #define ALIGNMENT_MASK 0x1F @@ -479,7 +479,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) goto COPY_BLOCK_128_BACK31; } -#else /* RTE_MACHINE_CPUFLAG */ +#else /* __AVX512F__ */ #define ALIGNMENT_MASK 0x0F @@ -803,7 +803,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) goto COPY_BLOCK_64_BACK15; } -#endif /* RTE_MACHINE_CPUFLAG */ +#endif /* __AVX512F__ */ static __rte_always_inline void * rte_memcpy_aligned(void *dst, const void *src, size_t n) diff --git a/lib/librte_efd/rte_efd_x86.h b/lib/librte_efd/rte_efd_x86.h index 6c207e87d..e2f9dcca8 100644 --- a/lib/librte_efd/rte_efd_x86.h +++ b/lib/librte_efd/rte_efd_x86.h @@ -19,7 +19,7 @@ efd_lookup_internal_avx2(const efd_hashfunc_t *group_hash_idx, const efd_lookuptbl_t *group_lookup_table, const uint32_t hash_val_a, const uint32_t hash_val_b) { -#ifdef RTE_MACHINE_CPUFLAG_AVX2 +#ifdef __AVX2__ efd_value_t value = 0; uint32_t i = 0; __m256i vhash_val_a = _mm256_set1_epi32(hash_val_a); diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 0a6d47471..7c7ab84af 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -1691,7 +1691,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, /* For match mask the first bit of every two bits indicates the match */ switch (sig_cmp_fn) { -#if defined(RTE_MACHINE_CPUFLAG_SSE2) +#if defined(__SSE2__) case RTE_HASH_COMPARE_SSE: /* Compare all signatures in the bucket */ *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16( diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c index cbcd0d440..3ea293a09 100644 --- a/lib/librte_member/rte_member_ht.c +++ b/lib/librte_member/rte_member_ht.c @@ -176,7 +176,7 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss, get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets, set_id) || @@ -216,7 +216,7 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss, for (i = 0; i < num_keys; i++) { switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: if (search_bucket_single_avx(prim_buckets[i], tmp_sig[i], buckets, &set_id[i]) || @@ -253,7 +253,7 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss, get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: search_bucket_multi_avx(prim_bucket, tmp_sig, buckets, &num_matches, match_per_key, set_id); @@ -296,7 +296,7 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss, match_cnt_tmp = 0; switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: search_bucket_multi_avx(prim_buckets[i], tmp_sig[i], buckets, &match_cnt_tmp, match_per_key, @@ -357,7 +357,7 @@ try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, enum rte_member_sig_compare_function cmp_fn) { switch (cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: if (update_entry_search_avx(prim, sig, buckets, set_id) || update_entry_search_avx(sec, sig, buckets, diff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h index 21a498ef0..74c8e3885 100644 --- a/lib/librte_member/rte_member_x86.h +++ b/lib/librte_member/rte_member_x86.h @@ -11,7 +11,7 @@ extern "C" { #include <x86intrin.h> -#if defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(__AVX2__) static inline int update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig, diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 9fd4794a9..56a0ed129 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -10,7 +10,7 @@ #include <rte_common.h> #include <rte_net_crc.h> -#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ) +#if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__) #define X86_64_SSE42_PCLMULQDQ 1 #elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL) #define ARM64_NEON_PMULL 1 -- 2.17.1
Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros, which are more complete than those provided by DPDK, and as such it allows new instruction sets to be leveraged without having to do extra work to set them up in DPDK. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- app/test-pmd/macswap.c | 2 +- config/arm/meson.build | 6 ------ drivers/net/ixgbe/ixgbe_ethdev.c | 2 +- examples/l3fwd/l3fwd.h | 2 +- examples/l3fwd/l3fwd_em.c | 12 ++++++------ examples/l3fwd/l3fwd_em_hlm.h | 2 +- examples/l3fwd/l3fwd_em_sequential.h | 2 +- examples/l3fwd/l3fwd_lpm.c | 6 +++--- lib/librte_eal/arm/include/rte_memcpy_32.h | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 2 +- lib/librte_hash/rte_hash_crc.h | 2 +- lib/librte_hash/rte_thash.h | 4 ++-- lib/librte_member/rte_member.h | 2 +- lib/librte_net/rte_net_crc.c | 2 +- lib/librte_node/ip4_lookup.c | 2 +- lib/librte_sched/rte_sched.c | 2 +- lib/librte_table/rte_lru_arm64.h | 2 +- lib/librte_table/rte_table_hash_func.h | 2 +- 18 files changed, 25 insertions(+), 31 deletions(-) diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index 74e2dd838..d82b39523 100644 --- a/app/test-pmd/macswap.c +++ b/app/test-pmd/macswap.c @@ -39,7 +39,7 @@ #include "testpmd.h" #if defined(RTE_ARCH_X86) #include "macswap_sse.h" -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM__NEON) #include "macswap_neon.h" #else #include "macswap.h" diff --git a/config/arm/meson.build b/config/arm/meson.build index 8728051d5..42c0c34a5 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -208,20 +208,14 @@ message(machine_args) if (cc.get_define('__ARM_NEON', args: machine_args) != '' or cc.get_define('__aarch64__', args: machine_args) != '') - dpdk_conf.set('RTE_MACHINE_CPUFLAG_NEON', 1) compile_time_cpuflags += ['RTE_CPUFLAG_NEON'] endif if cc.get_define('__ARM_FEATURE_CRC32', args: machine_args) != '' - dpdk_conf.set('RTE_MACHINE_CPUFLAG_CRC32', 1) compile_time_cpuflags += ['RTE_CPUFLAG_CRC32'] endif if cc.get_define('__ARM_FEATURE_CRYPTO', args: machine_args) != '' - dpdk_conf.set('RTE_MACHINE_CPUFLAG_AES', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_PMULL', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_SHA1', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_SHA2', 1) compile_time_cpuflags += ['RTE_CPUFLAG_AES', 'RTE_CPUFLAG_PMULL', 'RTE_CPUFLAG_SHA1', 'RTE_CPUFLAG_SHA2'] endif diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 0f065bbc0..c74467e06 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -3960,7 +3960,7 @@ ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev) dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc) return ptypes; -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(RTE_ARCH_X86) || defined(__ARM_NEON) if (dev->rx_pkt_burst == ixgbe_recv_pkts_vec || dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec) return ptypes; diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index 67055431f..2cf06099e 100644 --- a/examples/l3fwd/l3fwd.h +++ b/examples/l3fwd/l3fwd.h @@ -12,7 +12,7 @@ #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 -#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON) +#if !defined(NO_HASH_MULTI_LOOKUP) && defined(__ARM_NEON) #define NO_HASH_MULTI_LOOKUP 1 #endif diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index 78181a640..c529dcd3e 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -28,7 +28,7 @@ #include "l3fwd.h" #include "l3fwd_event.h" -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) #define EM_HASH_CRC 1 #endif @@ -223,7 +223,7 @@ em_mask_key(void *key, xmm_t mask) return _mm_and_si128(data, mask); } -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) static inline xmm_t em_mask_key(void *key, xmm_t mask) { @@ -303,7 +303,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid, void *lookup_struct) return (ret < 0) ? portid : ipv6_l3fwd_out_if[ret]; } -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON #if defined(NO_HASH_MULTI_LOOKUP) #include "l3fwd_em_sequential.h" #else @@ -685,7 +685,7 @@ em_main_loop(__rte_unused void *dummy) if (nb_rx == 0) continue; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON l3fwd_em_send_packets(nb_rx, pkts_burst, portid, qconf); #else @@ -723,7 +723,7 @@ em_event_loop_single(struct l3fwd_event_resources *evt_rsrc, struct rte_mbuf *mbuf = ev.mbuf; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON mbuf->port = em_get_dst_port(lconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); #else @@ -784,7 +784,7 @@ em_event_loop_burst(struct l3fwd_event_resources *evt_rsrc, continue; } -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON l3fwd_em_process_events(nb_deq, (struct rte_event **)&events, lconf); #else diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 79812716c..278707c18 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -9,7 +9,7 @@ #if defined RTE_ARCH_X86 #include "l3fwd_sse.h" #include "l3fwd_em_hlm_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_neon.h" #include "l3fwd_em_hlm_neon.h" #endif diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index b231b9994..6170052cf 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -16,7 +16,7 @@ #if defined RTE_ARCH_X86 #include "l3fwd_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_neon.h" #endif diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index 91eb74272..3dcf1fef1 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -163,7 +163,7 @@ lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt, #if defined(RTE_ARCH_X86) #include "l3fwd_lpm_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_lpm_neon.h" #elif defined(RTE_ARCH_PPC_64) #include "l3fwd_lpm_altivec.h" @@ -240,7 +240,7 @@ lpm_main_loop(__rte_unused void *dummy) if (nb_rx == 0) continue; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON \ +#if defined RTE_ARCH_X86 || defined __ARM_NEON \ || defined RTE_ARCH_PPC_64 l3fwd_lpm_send_packets(nb_rx, pkts_burst, portid, qconf); @@ -259,7 +259,7 @@ lpm_process_event_pkt(const struct lcore_conf *lconf, struct rte_mbuf *mbuf) { mbuf->port = lpm_get_dst_port(lconf, mbuf, mbuf->port); -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON \ +#if defined RTE_ARCH_X86 || defined __ARM_NEON \ || defined RTE_ARCH_PPC_64 process_packet(mbuf, &mbuf->port); #else diff --git a/lib/librte_eal/arm/include/rte_memcpy_32.h b/lib/librte_eal/arm/include/rte_memcpy_32.h index eb02c3b41..fb3245b59 100644 --- a/lib/librte_eal/arm/include/rte_memcpy_32.h +++ b/lib/librte_eal/arm/include/rte_memcpy_32.h @@ -16,7 +16,7 @@ extern "C" { #ifdef RTE_ARCH_ARM_NEON_MEMCPY -#ifndef RTE_MACHINE_CPUFLAG_NEON +#ifndef __ARM_NEON #error "Cannot optimize memcpy by NEON as the CPU seems to not support this" #endif diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 7c7ab84af..aad0c965b 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -1704,7 +1704,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, (__m128i const *)sec_bkt->sig_current), _mm_set1_epi16(sig))); break; -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) case RTE_HASH_COMPARE_NEON: { uint16x8_t vmat, vsig, x; int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1}; diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index cf28031b3..3e131aa6b 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -424,7 +424,7 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val) static uint8_t crc32_alg = CRC32_SW; -#if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32) #include "rte_crc_arm64.h" #else diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index 51b512946..061efa2ae 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -28,7 +28,7 @@ extern "C" { #include <rte_ip.h> #include <rte_common.h> -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(RTE_ARCH_X86) || defined(__ARM_NEON) #include <rte_vect.h> #endif @@ -149,7 +149,7 @@ rte_thash_load_v6_addrs(const struct rte_ipv6_hdr *orig, ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr); *(__m128i *)targ->v6.dst_addr = _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) uint8x16_t ipv6 = vld1q_u8((uint8_t const *)orig->src_addr); vst1q_u8((uint8_t *)targ->v6.src_addr, vrev32q_u8(ipv6)); ipv6 = vld1q_u8((uint8_t const *)orig->dst_addr); diff --git a/lib/librte_member/rte_member.h b/lib/librte_member/rte_member.h index ab2b23217..c0689e233 100644 --- a/lib/librte_member/rte_member.h +++ b/lib/librte_member/rte_member.h @@ -68,7 +68,7 @@ typedef uint16_t member_set_t; #define RTE_MEMBER_NAMESIZE 32 /** @internal Hash function used by membership library. */ -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) #include <rte_hash_crc.h> #define MEMBER_HASH_FUNC rte_hash_crc #else diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 56a0ed129..4f5b9e828 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -12,7 +12,7 @@ #if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__) #define X86_64_SSE42_PCLMULQDQ 1 -#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL) +#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRYPTO) #define ARM64_NEON_PMULL 1 #endif diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c index 8e6379457..293c77f39 100644 --- a/lib/librte_node/ip4_lookup.c +++ b/lib/librte_node/ip4_lookup.c @@ -30,7 +30,7 @@ struct ip4_lookup_node_main { static struct ip4_lookup_node_main ip4_lookup_nm; -#if defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(__ARM_NEON) #include "ip4_lookup_neon.h" #elif defined(RTE_ARCH_X86) #include "ip4_lookup_sse.h" diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 0fa074166..75be8b6bd 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -29,7 +29,7 @@ #ifdef RTE_ARCH_X86 #define SCHED_VECTOR_SSE4 -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) #define SCHED_VECTOR_NEON #endif diff --git a/lib/librte_table/rte_lru_arm64.h b/lib/librte_table/rte_lru_arm64.h index b45e9d03c..add889a57 100644 --- a/lib/librte_table/rte_lru_arm64.h +++ b/lib/librte_table/rte_lru_arm64.h @@ -13,7 +13,7 @@ extern "C" { #include <rte_vect.h> #ifndef RTE_TABLE_HASH_LRU_STRATEGY -#ifdef RTE_MACHINE_CPUFLAG_NEON +#ifdef __ARM_NEON #define RTE_TABLE_HASH_LRU_STRATEGY 3 #else /* if no NEON, use simple scalar version */ #define RTE_TABLE_HASH_LRU_STRATEGY 1 diff --git a/lib/librte_table/rte_table_hash_func.h b/lib/librte_table/rte_table_hash_func.h index 350c79564..c4c35cc06 100644 --- a/lib/librte_table/rte_table_hash_func.h +++ b/lib/librte_table/rte_table_hash_func.h @@ -41,7 +41,7 @@ rte_crc32_u64(uint64_t crc, uint64_t v) return _mm_crc32_u64(crc, v); } -#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) +#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32) #include "rte_table_hash_func_arm64.h" #else -- 2.17.1
Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros, which are more complete than those provided by DPDK, and as such it allows new instruction sets to be leveraged without having to do extra work to set them up in DPDK. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- config/ppc/meson.build | 2 -- 1 file changed, 2 deletions(-) diff --git a/config/ppc/meson.build b/config/ppc/meson.build index aa7d73d11..0d8da87e6 100644 --- a/config/ppc/meson.build +++ b/config/ppc/meson.build @@ -21,5 +21,3 @@ endif dpdk_conf.set('RTE_MAX_LCORE', 1536) dpdk_conf.set('RTE_MAX_NUMA_NODES', 32) dpdk_conf.set('RTE_CACHE_LINE_SIZE', 128) -dpdk_conf.set('RTE_MACHINE_CPUFLAG_ALTIVEC', 1) -dpdk_conf.set('RTE_MACHINE_CPUFLAG_VSX', 1) -- 2.17.1
RTE_MACHINE_CPUFLAG macros are replaced with predefined compiler defines. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- doc/guides/prog_guide/writing_efficient_code.rst | 2 -- doc/guides/rel_notes/release_20_11.rst | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/guides/prog_guide/writing_efficient_code.rst b/doc/guides/prog_guide/writing_efficient_code.rst index 2639ef7bf..74f8067c7 100644 --- a/doc/guides/prog_guide/writing_efficient_code.rst +++ b/doc/guides/prog_guide/writing_efficient_code.rst @@ -273,5 +273,3 @@ main() function and checks if the current machine is suitable for running the bi Along with compiler optimizations, a set of preprocessor defines are automatically added to the build process (regardless of the compiler version). These defines correspond to the instruction sets that the target CPU should be able to support. -For example, a binary compiled for any SSE4.2-capable processor will have RTE_MACHINE_CPUFLAG_SSE4_2 defined, -thus enabling compile-time code path selection for different platforms. diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst index c6642f5f9..03deb89de 100644 --- a/doc/guides/rel_notes/release_20_11.rst +++ b/doc/guides/rel_notes/release_20_11.rst @@ -91,6 +91,7 @@ Removed Items Also, make sure to start the actual text at the margin. ======================================================= +* Removed ``RTE_MACHINE_CPUFLAG`` macros. API Changes ----------- -- 2.17.1
On Wed, Sep 23, 2020 at 12:45 PM Radu Nicolau <radu.nicolau@intel.com> wrote: > diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c > index fdbee70b4..78181a640 100644 > --- a/examples/l3fwd/l3fwd_em.c > +++ b/examples/l3fwd/l3fwd_em.c > @@ -215,7 +215,7 @@ static rte_xmm_t mask0; > static rte_xmm_t mask1; > static rte_xmm_t mask2; > > -#if defined(RTE_MACHINE_CPUFLAG_SSE2) > +#if defined(__SSE2__) > static inline xmm_t > em_mask_key(void *key, xmm_t mask) > { > @@ -231,7 +231,7 @@ em_mask_key(void *key, xmm_t mask) > > return vandq_s32(data, mask); > } > -#elif defined(RTE_MACHINE_CPUFLAG_ALTIVEC) > +#elif defined(__ALTIVEC__) It should be in patch 3. > static inline xmm_t > em_mask_key(void *key, xmm_t mask) > { -- David Marchand
On Wed, Sep 23, 2020 at 12:45 PM Radu Nicolau <radu.nicolau@intel.com> wrote: > diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c > index 74e2dd838..d82b39523 100644 > --- a/app/test-pmd/macswap.c > +++ b/app/test-pmd/macswap.c > @@ -39,7 +39,7 @@ > #include "testpmd.h" > #if defined(RTE_ARCH_X86) > #include "macswap_sse.h" > -#elif defined(RTE_MACHINE_CPUFLAG_NEON) > +#elif defined(__ARM__NEON) Typo for __ARM_NEON? > #include "macswap_neon.h" > #else > #include "macswap.h" -- David Marchand
Remove RTE_MACHINE_CPUFLAG_ macros from the build. Deprecation notice sent, pasted here for reference: * build macros: The macros defining RTE_MACHINE_CPUFLAG_* will be removed from the build. The information provided by these macros is available through standard compiler macros. For example, RTE_MACHINE_CPUFLAG_SSE3 duplicates the compiler-provided macro __SSE3__. Radu Nicolau (4): x86: change cpuflag macros to compiler macros arm: change cpuflag macros to compiler macros ppc: change cpuflag macros to compiler macros doc: remove reference to RTE_MACHINE_CPUFLAG app/test-pmd/macswap.c | 2 +- app/test/test_memcpy_perf.c | 8 ++++---- config/arm/meson.build | 6 ------ config/ppc/meson.build | 2 -- config/x86/meson.build | 2 -- doc/guides/prog_guide/writing_efficient_code.rst | 2 -- doc/guides/rel_notes/release_20_11.rst | 1 + drivers/net/enic/meson.build | 2 +- drivers/net/i40e/meson.build | 2 +- drivers/net/iavf/meson.build | 2 +- drivers/net/ice/meson.build | 2 +- drivers/net/ixgbe/ixgbe_ethdev.c | 2 +- examples/l3fwd/l3fwd.h | 2 +- examples/l3fwd/l3fwd_em.c | 16 ++++++++-------- examples/l3fwd/l3fwd_em_hlm.h | 2 +- examples/l3fwd/l3fwd_em_sequential.h | 2 +- examples/l3fwd/l3fwd_lpm.c | 6 +++--- lib/librte_acl/meson.build | 2 +- lib/librte_eal/arm/include/rte_memcpy_32.h | 2 +- lib/librte_eal/common/rte_random.c | 4 ++-- lib/librte_eal/x86/include/rte_memcpy.h | 8 ++++---- lib/librte_efd/rte_efd_x86.h | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 4 ++-- lib/librte_hash/rte_hash_crc.h | 2 +- lib/librte_hash/rte_thash.h | 4 ++-- lib/librte_member/rte_member.h | 2 +- lib/librte_member/rte_member_ht.c | 10 +++++----- lib/librte_member/rte_member_x86.h | 2 +- lib/librte_net/rte_net_crc.c | 4 ++-- lib/librte_node/ip4_lookup.c | 2 +- lib/librte_sched/rte_sched.c | 2 +- lib/librte_table/rte_lru_arm64.h | 2 +- lib/librte_table/rte_table_hash_func.h | 2 +- 33 files changed, 52 insertions(+), 63 deletions(-) -- 2.17.1
Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros, which are more complete than those provided by DPDK, and as such it allows new instruction sets to be leveraged without having to do extra work to set them up in DPDK. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- app/test/test_memcpy_perf.c | 8 ++++---- config/x86/meson.build | 2 -- drivers/net/enic/meson.build | 2 +- drivers/net/i40e/meson.build | 2 +- drivers/net/iavf/meson.build | 2 +- drivers/net/ice/meson.build | 2 +- examples/l3fwd/l3fwd_em.c | 2 +- lib/librte_acl/meson.build | 2 +- lib/librte_eal/common/rte_random.c | 4 ++-- lib/librte_eal/x86/include/rte_memcpy.h | 8 ++++---- lib/librte_efd/rte_efd_x86.h | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 2 +- lib/librte_member/rte_member_ht.c | 10 +++++----- lib/librte_member/rte_member_x86.h | 2 +- lib/librte_net/rte_net_crc.c | 2 +- 15 files changed, 25 insertions(+), 27 deletions(-) diff --git a/app/test/test_memcpy_perf.c b/app/test/test_memcpy_perf.c index 00a2092b4..c711e36ba 100644 --- a/app/test/test_memcpy_perf.c +++ b/app/test/test_memcpy_perf.c @@ -51,13 +51,13 @@ static size_t buf_sizes[TEST_VALUE_RANGE]; #define TEST_BATCH_SIZE 100 /* Data is aligned on this many bytes (power of 2) */ -#ifdef RTE_MACHINE_CPUFLAG_AVX512F +#ifdef __AVX512F__ #define ALIGNMENT_UNIT 64 -#elif defined RTE_MACHINE_CPUFLAG_AVX2 +#elif defined __AVX2__ #define ALIGNMENT_UNIT 32 -#else /* RTE_MACHINE_CPUFLAG */ +#else #define ALIGNMENT_UNIT 16 -#endif /* RTE_MACHINE_CPUFLAG */ +#endif /* * Pointers used in performance tests. The two large buffers are for uncached diff --git a/config/x86/meson.build b/config/x86/meson.build index 6ec020ef6..fea4d5403 100644 --- a/config/x86/meson.build +++ b/config/x86/meson.build @@ -18,7 +18,6 @@ endif base_flags = ['SSE', 'SSE2', 'SSE3','SSSE3', 'SSE4_1', 'SSE4_2'] foreach f:base_flags - dpdk_conf.set('RTE_MACHINE_CPUFLAG_' + f, 1) compile_time_cpuflags += ['RTE_CPUFLAG_' + f] endforeach @@ -32,7 +31,6 @@ foreach f:optional_flags elif f == 'RDRND' f = 'RDRAND' endif - dpdk_conf.set('RTE_MACHINE_CPUFLAG_' + f, 1) compile_time_cpuflags += ['RTE_CPUFLAG_' + f] endif endforeach diff --git a/drivers/net/enic/meson.build b/drivers/net/enic/meson.build index 7f4836d0f..86ef2a8a2 100644 --- a/drivers/net/enic/meson.build +++ b/drivers/net/enic/meson.build @@ -20,7 +20,7 @@ deps += ['hash'] includes += include_directories('base') # The current implementation assumes 64-bit pointers -if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') and dpdk_conf.get('RTE_ARCH_64') +if cc.get_define('__AVX2__', args: machine_args) != '' and dpdk_conf.get('RTE_ARCH_64') sources += files('enic_rxtx_vec_avx2.c') # Build the avx2 handler if the compiler supports it, even though 'machine' # does not. This is to support users who build for the min supported machine diff --git a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index 211d45d88..68f9895cd 100644 --- a/drivers/net/i40e/meson.build +++ b/drivers/net/i40e/meson.build @@ -31,7 +31,7 @@ if arch_subdir == 'x86' # compile AVX2 version if either: # a. we have AVX supported in minimum instruction set baseline # b. it's not minimum instruction set, but supported by compiler - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' cflags += ['-DCC_AVX2_SUPPORT'] sources += files('i40e_rxtx_vec_avx2.c') elif cc.has_argument('-mavx2') diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build index a3fad363d..33407c503 100644 --- a/drivers/net/iavf/meson.build +++ b/drivers/net/iavf/meson.build @@ -21,7 +21,7 @@ if arch_subdir == 'x86' # compile AVX2 version if either: # a. we have AVX supported in minimum instruction set baseline # b. it's not minimum instruction set, but supported by compiler - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' cflags += ['-DCC_AVX2_SUPPORT'] sources += files('iavf_rxtx_vec_avx2.c') elif cc.has_argument('-mavx2') diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build index e6fe74487..99e1b773a 100644 --- a/drivers/net/ice/meson.build +++ b/drivers/net/ice/meson.build @@ -22,7 +22,7 @@ if arch_subdir == 'x86' # compile AVX2 version if either: # a. we have AVX supported in minimum instruction set baseline # b. it's not minimum instruction set, but supported by compiler - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' sources += files('ice_rxtx_vec_avx2.c') elif cc.has_argument('-mavx2') ice_avx2_lib = static_library('ice_avx2_lib', diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index fdbee70b4..df0c8dd16 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -215,7 +215,7 @@ static rte_xmm_t mask0; static rte_xmm_t mask1; static rte_xmm_t mask2; -#if defined(RTE_MACHINE_CPUFLAG_SSE2) +#if defined(__SSE2__) static inline xmm_t em_mask_key(void *key, xmm_t mask) { diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build index d1e2c184c..b31a3f798 100644 --- a/lib/librte_acl/meson.build +++ b/lib/librte_acl/meson.build @@ -15,7 +15,7 @@ if dpdk_conf.has('RTE_ARCH_X86') # in former case, just add avx2 C file to files list # in latter case, compile c file to static lib, using correct compiler # flags, and then have the .o file from static lib linked into main lib. - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') + if cc.get_define('__AVX2__', args: machine_args) != '' sources += files('acl_run_avx2.c') cflags += '-DCC_AVX2_SUPPORT' elif cc.has_argument('-mavx2') diff --git a/lib/librte_eal/common/rte_random.c b/lib/librte_eal/common/rte_random.c index b7a089ac4..b2c5416b3 100644 --- a/lib/librte_eal/common/rte_random.c +++ b/lib/librte_eal/common/rte_random.c @@ -2,7 +2,7 @@ * Copyright(c) 2019 Ericsson AB */ -#ifdef RTE_MACHINE_CPUFLAG_RDSEED +#ifdef __RDSEED__ #include <x86intrin.h> #endif #include <stdlib.h> @@ -188,7 +188,7 @@ __rte_random_initial_seed(void) if (ge_rc == 0) return ge_seed; #endif -#ifdef RTE_MACHINE_CPUFLAG_RDSEED +#ifdef __RDSEED__ unsigned int rdseed_low; unsigned int rdseed_high; diff --git a/lib/librte_eal/x86/include/rte_memcpy.h b/lib/librte_eal/x86/include/rte_memcpy.h index 9c67232df..008a3de67 100644 --- a/lib/librte_eal/x86/include/rte_memcpy.h +++ b/lib/librte_eal/x86/include/rte_memcpy.h @@ -45,7 +45,7 @@ extern "C" { static __rte_always_inline void * rte_memcpy(void *dst, const void *src, size_t n); -#ifdef RTE_MACHINE_CPUFLAG_AVX512F +#ifdef __AVX512F__ #define ALIGNMENT_MASK 0x3F @@ -286,7 +286,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) goto COPY_BLOCK_128_BACK63; } -#elif defined RTE_MACHINE_CPUFLAG_AVX2 +#elif defined __AVX2__ #define ALIGNMENT_MASK 0x1F @@ -479,7 +479,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) goto COPY_BLOCK_128_BACK31; } -#else /* RTE_MACHINE_CPUFLAG */ +#else /* __AVX512F__ */ #define ALIGNMENT_MASK 0x0F @@ -803,7 +803,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) goto COPY_BLOCK_64_BACK15; } -#endif /* RTE_MACHINE_CPUFLAG */ +#endif /* __AVX512F__ */ static __rte_always_inline void * rte_memcpy_aligned(void *dst, const void *src, size_t n) diff --git a/lib/librte_efd/rte_efd_x86.h b/lib/librte_efd/rte_efd_x86.h index 6c207e87d..e2f9dcca8 100644 --- a/lib/librte_efd/rte_efd_x86.h +++ b/lib/librte_efd/rte_efd_x86.h @@ -19,7 +19,7 @@ efd_lookup_internal_avx2(const efd_hashfunc_t *group_hash_idx, const efd_lookuptbl_t *group_lookup_table, const uint32_t hash_val_a, const uint32_t hash_val_b) { -#ifdef RTE_MACHINE_CPUFLAG_AVX2 +#ifdef __AVX2__ efd_value_t value = 0; uint32_t i = 0; __m256i vhash_val_a = _mm256_set1_epi32(hash_val_a); diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 0a6d47471..7c7ab84af 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -1691,7 +1691,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, /* For match mask the first bit of every two bits indicates the match */ switch (sig_cmp_fn) { -#if defined(RTE_MACHINE_CPUFLAG_SSE2) +#if defined(__SSE2__) case RTE_HASH_COMPARE_SSE: /* Compare all signatures in the bucket */ *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16( diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c index cbcd0d440..3ea293a09 100644 --- a/lib/librte_member/rte_member_ht.c +++ b/lib/librte_member/rte_member_ht.c @@ -176,7 +176,7 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss, get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets, set_id) || @@ -216,7 +216,7 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss, for (i = 0; i < num_keys; i++) { switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: if (search_bucket_single_avx(prim_buckets[i], tmp_sig[i], buckets, &set_id[i]) || @@ -253,7 +253,7 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss, get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: search_bucket_multi_avx(prim_bucket, tmp_sig, buckets, &num_matches, match_per_key, set_id); @@ -296,7 +296,7 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss, match_cnt_tmp = 0; switch (ss->sig_cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: search_bucket_multi_avx(prim_buckets[i], tmp_sig[i], buckets, &match_cnt_tmp, match_per_key, @@ -357,7 +357,7 @@ try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, enum rte_member_sig_compare_function cmp_fn) { switch (cmp_fn) { -#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(RTE_ARCH_X86) && defined(__AVX2__) case RTE_MEMBER_COMPARE_AVX2: if (update_entry_search_avx(prim, sig, buckets, set_id) || update_entry_search_avx(sec, sig, buckets, diff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h index 21a498ef0..74c8e3885 100644 --- a/lib/librte_member/rte_member_x86.h +++ b/lib/librte_member/rte_member_x86.h @@ -11,7 +11,7 @@ extern "C" { #include <x86intrin.h> -#if defined(RTE_MACHINE_CPUFLAG_AVX2) +#if defined(__AVX2__) static inline int update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig, diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 9fd4794a9..56a0ed129 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -10,7 +10,7 @@ #include <rte_common.h> #include <rte_net_crc.h> -#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ) +#if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__) #define X86_64_SSE42_PCLMULQDQ 1 #elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL) #define ARM64_NEON_PMULL 1 -- 2.17.1
Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros, which are more complete than those provided by DPDK, and as such it allows new instruction sets to be leveraged without having to do extra work to set them up in DPDK. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- app/test-pmd/macswap.c | 2 +- config/arm/meson.build | 6 ------ drivers/net/ixgbe/ixgbe_ethdev.c | 2 +- examples/l3fwd/l3fwd.h | 2 +- examples/l3fwd/l3fwd_em.c | 12 ++++++------ examples/l3fwd/l3fwd_em_hlm.h | 2 +- examples/l3fwd/l3fwd_em_sequential.h | 2 +- examples/l3fwd/l3fwd_lpm.c | 6 +++--- lib/librte_eal/arm/include/rte_memcpy_32.h | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 2 +- lib/librte_hash/rte_hash_crc.h | 2 +- lib/librte_hash/rte_thash.h | 4 ++-- lib/librte_member/rte_member.h | 2 +- lib/librte_net/rte_net_crc.c | 2 +- lib/librte_node/ip4_lookup.c | 2 +- lib/librte_sched/rte_sched.c | 2 +- lib/librte_table/rte_lru_arm64.h | 2 +- lib/librte_table/rte_table_hash_func.h | 2 +- 18 files changed, 25 insertions(+), 31 deletions(-) diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index 74e2dd838..310bca06a 100644 --- a/app/test-pmd/macswap.c +++ b/app/test-pmd/macswap.c @@ -39,7 +39,7 @@ #include "testpmd.h" #if defined(RTE_ARCH_X86) #include "macswap_sse.h" -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) #include "macswap_neon.h" #else #include "macswap.h" diff --git a/config/arm/meson.build b/config/arm/meson.build index 8728051d5..42c0c34a5 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -208,20 +208,14 @@ message(machine_args) if (cc.get_define('__ARM_NEON', args: machine_args) != '' or cc.get_define('__aarch64__', args: machine_args) != '') - dpdk_conf.set('RTE_MACHINE_CPUFLAG_NEON', 1) compile_time_cpuflags += ['RTE_CPUFLAG_NEON'] endif if cc.get_define('__ARM_FEATURE_CRC32', args: machine_args) != '' - dpdk_conf.set('RTE_MACHINE_CPUFLAG_CRC32', 1) compile_time_cpuflags += ['RTE_CPUFLAG_CRC32'] endif if cc.get_define('__ARM_FEATURE_CRYPTO', args: machine_args) != '' - dpdk_conf.set('RTE_MACHINE_CPUFLAG_AES', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_PMULL', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_SHA1', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_SHA2', 1) compile_time_cpuflags += ['RTE_CPUFLAG_AES', 'RTE_CPUFLAG_PMULL', 'RTE_CPUFLAG_SHA1', 'RTE_CPUFLAG_SHA2'] endif diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 0f065bbc0..c74467e06 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -3960,7 +3960,7 @@ ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev) dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc) return ptypes; -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(RTE_ARCH_X86) || defined(__ARM_NEON) if (dev->rx_pkt_burst == ixgbe_recv_pkts_vec || dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec) return ptypes; diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index 67055431f..2cf06099e 100644 --- a/examples/l3fwd/l3fwd.h +++ b/examples/l3fwd/l3fwd.h @@ -12,7 +12,7 @@ #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 -#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON) +#if !defined(NO_HASH_MULTI_LOOKUP) && defined(__ARM_NEON) #define NO_HASH_MULTI_LOOKUP 1 #endif diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index df0c8dd16..3b35fa3e5 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -28,7 +28,7 @@ #include "l3fwd.h" #include "l3fwd_event.h" -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) #define EM_HASH_CRC 1 #endif @@ -223,7 +223,7 @@ em_mask_key(void *key, xmm_t mask) return _mm_and_si128(data, mask); } -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) static inline xmm_t em_mask_key(void *key, xmm_t mask) { @@ -303,7 +303,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid, void *lookup_struct) return (ret < 0) ? portid : ipv6_l3fwd_out_if[ret]; } -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON #if defined(NO_HASH_MULTI_LOOKUP) #include "l3fwd_em_sequential.h" #else @@ -685,7 +685,7 @@ em_main_loop(__rte_unused void *dummy) if (nb_rx == 0) continue; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON l3fwd_em_send_packets(nb_rx, pkts_burst, portid, qconf); #else @@ -723,7 +723,7 @@ em_event_loop_single(struct l3fwd_event_resources *evt_rsrc, struct rte_mbuf *mbuf = ev.mbuf; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON mbuf->port = em_get_dst_port(lconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); #else @@ -784,7 +784,7 @@ em_event_loop_burst(struct l3fwd_event_resources *evt_rsrc, continue; } -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON l3fwd_em_process_events(nb_deq, (struct rte_event **)&events, lconf); #else diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 79812716c..278707c18 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -9,7 +9,7 @@ #if defined RTE_ARCH_X86 #include "l3fwd_sse.h" #include "l3fwd_em_hlm_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_neon.h" #include "l3fwd_em_hlm_neon.h" #endif diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index b231b9994..6170052cf 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -16,7 +16,7 @@ #if defined RTE_ARCH_X86 #include "l3fwd_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_neon.h" #endif diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index 91eb74272..3dcf1fef1 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -163,7 +163,7 @@ lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt, #if defined(RTE_ARCH_X86) #include "l3fwd_lpm_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_lpm_neon.h" #elif defined(RTE_ARCH_PPC_64) #include "l3fwd_lpm_altivec.h" @@ -240,7 +240,7 @@ lpm_main_loop(__rte_unused void *dummy) if (nb_rx == 0) continue; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON \ +#if defined RTE_ARCH_X86 || defined __ARM_NEON \ || defined RTE_ARCH_PPC_64 l3fwd_lpm_send_packets(nb_rx, pkts_burst, portid, qconf); @@ -259,7 +259,7 @@ lpm_process_event_pkt(const struct lcore_conf *lconf, struct rte_mbuf *mbuf) { mbuf->port = lpm_get_dst_port(lconf, mbuf, mbuf->port); -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON \ +#if defined RTE_ARCH_X86 || defined __ARM_NEON \ || defined RTE_ARCH_PPC_64 process_packet(mbuf, &mbuf->port); #else diff --git a/lib/librte_eal/arm/include/rte_memcpy_32.h b/lib/librte_eal/arm/include/rte_memcpy_32.h index eb02c3b41..fb3245b59 100644 --- a/lib/librte_eal/arm/include/rte_memcpy_32.h +++ b/lib/librte_eal/arm/include/rte_memcpy_32.h @@ -16,7 +16,7 @@ extern "C" { #ifdef RTE_ARCH_ARM_NEON_MEMCPY -#ifndef RTE_MACHINE_CPUFLAG_NEON +#ifndef __ARM_NEON #error "Cannot optimize memcpy by NEON as the CPU seems to not support this" #endif diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 7c7ab84af..aad0c965b 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -1704,7 +1704,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, (__m128i const *)sec_bkt->sig_current), _mm_set1_epi16(sig))); break; -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) case RTE_HASH_COMPARE_NEON: { uint16x8_t vmat, vsig, x; int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1}; diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index cf28031b3..3e131aa6b 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -424,7 +424,7 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val) static uint8_t crc32_alg = CRC32_SW; -#if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32) #include "rte_crc_arm64.h" #else diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index 51b512946..061efa2ae 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -28,7 +28,7 @@ extern "C" { #include <rte_ip.h> #include <rte_common.h> -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(RTE_ARCH_X86) || defined(__ARM_NEON) #include <rte_vect.h> #endif @@ -149,7 +149,7 @@ rte_thash_load_v6_addrs(const struct rte_ipv6_hdr *orig, ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr); *(__m128i *)targ->v6.dst_addr = _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) uint8x16_t ipv6 = vld1q_u8((uint8_t const *)orig->src_addr); vst1q_u8((uint8_t *)targ->v6.src_addr, vrev32q_u8(ipv6)); ipv6 = vld1q_u8((uint8_t const *)orig->dst_addr); diff --git a/lib/librte_member/rte_member.h b/lib/librte_member/rte_member.h index ab2b23217..c0689e233 100644 --- a/lib/librte_member/rte_member.h +++ b/lib/librte_member/rte_member.h @@ -68,7 +68,7 @@ typedef uint16_t member_set_t; #define RTE_MEMBER_NAMESIZE 32 /** @internal Hash function used by membership library. */ -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) #include <rte_hash_crc.h> #define MEMBER_HASH_FUNC rte_hash_crc #else diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 56a0ed129..4f5b9e828 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -12,7 +12,7 @@ #if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__) #define X86_64_SSE42_PCLMULQDQ 1 -#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL) +#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRYPTO) #define ARM64_NEON_PMULL 1 #endif diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c index 8e6379457..293c77f39 100644 --- a/lib/librte_node/ip4_lookup.c +++ b/lib/librte_node/ip4_lookup.c @@ -30,7 +30,7 @@ struct ip4_lookup_node_main { static struct ip4_lookup_node_main ip4_lookup_nm; -#if defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(__ARM_NEON) #include "ip4_lookup_neon.h" #elif defined(RTE_ARCH_X86) #include "ip4_lookup_sse.h" diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 0fa074166..75be8b6bd 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -29,7 +29,7 @@ #ifdef RTE_ARCH_X86 #define SCHED_VECTOR_SSE4 -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) #define SCHED_VECTOR_NEON #endif diff --git a/lib/librte_table/rte_lru_arm64.h b/lib/librte_table/rte_lru_arm64.h index b45e9d03c..add889a57 100644 --- a/lib/librte_table/rte_lru_arm64.h +++ b/lib/librte_table/rte_lru_arm64.h @@ -13,7 +13,7 @@ extern "C" { #include <rte_vect.h> #ifndef RTE_TABLE_HASH_LRU_STRATEGY -#ifdef RTE_MACHINE_CPUFLAG_NEON +#ifdef __ARM_NEON #define RTE_TABLE_HASH_LRU_STRATEGY 3 #else /* if no NEON, use simple scalar version */ #define RTE_TABLE_HASH_LRU_STRATEGY 1 diff --git a/lib/librte_table/rte_table_hash_func.h b/lib/librte_table/rte_table_hash_func.h index 350c79564..c4c35cc06 100644 --- a/lib/librte_table/rte_table_hash_func.h +++ b/lib/librte_table/rte_table_hash_func.h @@ -41,7 +41,7 @@ rte_crc32_u64(uint64_t crc, uint64_t v) return _mm_crc32_u64(crc, v); } -#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) +#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32) #include "rte_table_hash_func_arm64.h" #else -- 2.17.1
Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros, which are more complete than those provided by DPDK, and as such it allows new instruction sets to be leveraged without having to do extra work to set them up in DPDK. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- config/ppc/meson.build | 2 -- examples/l3fwd/l3fwd_em.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/config/ppc/meson.build b/config/ppc/meson.build index aa7d73d11..0d8da87e6 100644 --- a/config/ppc/meson.build +++ b/config/ppc/meson.build @@ -21,5 +21,3 @@ endif dpdk_conf.set('RTE_MAX_LCORE', 1536) dpdk_conf.set('RTE_MAX_NUMA_NODES', 32) dpdk_conf.set('RTE_CACHE_LINE_SIZE', 128) -dpdk_conf.set('RTE_MACHINE_CPUFLAG_ALTIVEC', 1) -dpdk_conf.set('RTE_MACHINE_CPUFLAG_VSX', 1) diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index 3b35fa3e5..c529dcd3e 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -231,7 +231,7 @@ em_mask_key(void *key, xmm_t mask) return vandq_s32(data, mask); } -#elif defined(RTE_MACHINE_CPUFLAG_ALTIVEC) +#elif defined(__ALTIVEC__) static inline xmm_t em_mask_key(void *key, xmm_t mask) { -- 2.17.1
RTE_MACHINE_CPUFLAG macros are replaced with predefined compiler defines. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com> --- doc/guides/prog_guide/writing_efficient_code.rst | 2 -- doc/guides/rel_notes/release_20_11.rst | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/guides/prog_guide/writing_efficient_code.rst b/doc/guides/prog_guide/writing_efficient_code.rst index 2639ef7bf..74f8067c7 100644 --- a/doc/guides/prog_guide/writing_efficient_code.rst +++ b/doc/guides/prog_guide/writing_efficient_code.rst @@ -273,5 +273,3 @@ main() function and checks if the current machine is suitable for running the bi Along with compiler optimizations, a set of preprocessor defines are automatically added to the build process (regardless of the compiler version). These defines correspond to the instruction sets that the target CPU should be able to support. -For example, a binary compiled for any SSE4.2-capable processor will have RTE_MACHINE_CPUFLAG_SSE4_2 defined, -thus enabling compile-time code path selection for different platforms. diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst index f377ab8e8..2d43d0d77 100644 --- a/doc/guides/rel_notes/release_20_11.rst +++ b/doc/guides/rel_notes/release_20_11.rst @@ -91,6 +91,7 @@ Removed Items Also, make sure to start the actual text at the margin. ======================================================= +* Removed ``RTE_MACHINE_CPUFLAG`` macros. API Changes ----------- -- 2.17.1
On Thu, Sep 24, 2020 at 10:19 AM Radu Nicolau <radu.nicolau@intel.com> wrote:
>
> Remove RTE_MACHINE_CPUFLAG_ macros from the build.
> Deprecation notice sent, pasted here for reference:
>
> * build macros: The macros defining RTE_MACHINE_CPUFLAG_* will be removed
> from the build. The information provided by these macros is available
> through standard compiler macros. For example, RTE_MACHINE_CPUFLAG_SSE3
> duplicates the compiler-provided macro __SSE3__.
>
>
> Radu Nicolau (4):
> x86: change cpuflag macros to compiler macros
> arm: change cpuflag macros to compiler macros
> ppc: change cpuflag macros to compiler macros
> doc: remove reference to RTE_MACHINE_CPUFLAG
>
> app/test-pmd/macswap.c | 2 +-
> app/test/test_memcpy_perf.c | 8 ++++----
> config/arm/meson.build | 6 ------
> config/ppc/meson.build | 2 --
> config/x86/meson.build | 2 --
> doc/guides/prog_guide/writing_efficient_code.rst | 2 --
> doc/guides/rel_notes/release_20_11.rst | 1 +
> drivers/net/enic/meson.build | 2 +-
> drivers/net/i40e/meson.build | 2 +-
> drivers/net/iavf/meson.build | 2 +-
> drivers/net/ice/meson.build | 2 +-
> drivers/net/ixgbe/ixgbe_ethdev.c | 2 +-
> examples/l3fwd/l3fwd.h | 2 +-
> examples/l3fwd/l3fwd_em.c | 16 ++++++++--------
> examples/l3fwd/l3fwd_em_hlm.h | 2 +-
> examples/l3fwd/l3fwd_em_sequential.h | 2 +-
> examples/l3fwd/l3fwd_lpm.c | 6 +++---
> lib/librte_acl/meson.build | 2 +-
> lib/librte_eal/arm/include/rte_memcpy_32.h | 2 +-
> lib/librte_eal/common/rte_random.c | 4 ++--
> lib/librte_eal/x86/include/rte_memcpy.h | 8 ++++----
> lib/librte_efd/rte_efd_x86.h | 2 +-
> lib/librte_hash/rte_cuckoo_hash.c | 4 ++--
> lib/librte_hash/rte_hash_crc.h | 2 +-
> lib/librte_hash/rte_thash.h | 4 ++--
> lib/librte_member/rte_member.h | 2 +-
> lib/librte_member/rte_member_ht.c | 10 +++++-----
> lib/librte_member/rte_member_x86.h | 2 +-
> lib/librte_net/rte_net_crc.c | 4 ++--
> lib/librte_node/ip4_lookup.c | 2 +-
> lib/librte_sched/rte_sched.c | 2 +-
> lib/librte_table/rte_lru_arm64.h | 2 +-
> lib/librte_table/rte_table_hash_func.h | 2 +-
> 33 files changed, 52 insertions(+), 63 deletions(-)
Acked-by: David Marchand <david.marchand@redhat.com>
Rewrote the release note update based on the deprecation notice.
Dropped the notice.
Applied, thanks.
--
David Marchand
On 9/24/20 1:18 AM, Radu Nicolau wrote:
> Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler
> macros, which are more complete than those provided by DPDK, and as such
> it allows new instruction sets to be leveraged without having to do
> extra work to set them up in DPDK.
>
> Signed-off-by: Sean Morrissey <sean.morrissey@intel.com>
> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> ---
Reviewed-by: David Christensen <drc@linux.vnet.ibm.com>