From: Thomas Monjalon <thomas@monjalon.net>
To: dev@dpdk.org
Cc: bruce.richardson@intel.com,
Konstantin Ananyev <konstantin.ananyev@huawei.com>,
Yipeng Wang <yipeng1.wang@intel.com>,
Sameh Gobriel <sameh.gobriel@intel.com>
Subject: [PATCH v2 3/4] member: remove AVX2 build-time checks
Date: Thu, 18 Sep 2025 11:08:09 +0200 [thread overview]
Message-ID: <20250918091039.1368875-4-thomas@monjalon.net> (raw)
In-Reply-To: <20250918091039.1368875-1-thomas@monjalon.net>
Since all supported compilers can generate AVX2 code,
it is possible to force AVX2 compilation on some specific functions
and remove the checks for AVX2 support when x86 arch is already checked.
The functions have to be moved in a .c file, losing inlining.
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
.../{rte_member_x86.h => member_avx2.c} | 23 ++-----
lib/member/meson.build | 2 +-
lib/member/rte_member_ht.c | 14 ++--
lib/member/rte_member_x86.h | 68 ++-----------------
4 files changed, 21 insertions(+), 86 deletions(-)
copy lib/member/{rte_member_x86.h => member_avx2.c} (87%)
diff --git a/lib/member/rte_member_x86.h b/lib/member/member_avx2.c
similarity index 87%
copy from lib/member/rte_member_x86.h
copy to lib/member/member_avx2.c
index 4de453485b..4c909d5b48 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/member_avx2.c
@@ -2,18 +2,14 @@
* Copyright(c) 2017 Intel Corporation
*/
-#ifndef _RTE_MEMBER_X86_H_
-#define _RTE_MEMBER_X86_H_
-
#include <x86intrin.h>
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include <rte_bitops.h>
-#if defined(__AVX2__)
+#include "rte_member.h"
+#include "rte_member_x86.h"
-static inline int
+int
update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig,
struct member_ht_bucket *buckets,
member_set_t set_id)
@@ -29,7 +25,7 @@ update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig,
return 0;
}
-static inline int
+int
search_bucket_single_avx(uint32_t bucket_id, member_sig_t tmp_sig,
struct member_ht_bucket *buckets,
member_set_t *set_id)
@@ -48,7 +44,7 @@ search_bucket_single_avx(uint32_t bucket_id, member_sig_t tmp_sig,
return 0;
}
-static inline void
+void
search_bucket_multi_avx(uint32_t bucket_id, member_sig_t tmp_sig,
struct member_ht_bucket *buckets,
uint32_t *counter,
@@ -69,10 +65,3 @@ search_bucket_multi_avx(uint32_t bucket_id, member_sig_t tmp_sig,
hitmask &= ~(3U << ((hit_idx) << 1));
}
}
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RTE_MEMBER_X86_H_ */
diff --git a/lib/member/meson.build b/lib/member/meson.build
index 07f9afaed9..fc08b70019 100644
--- a/lib/member/meson.build
+++ b/lib/member/meson.build
@@ -20,7 +20,7 @@ sources = files(
deps += ['hash', 'ring']
-# compile AVX512 version if we have avx512 on MSVC or the 'ifma' flag on GCC/Clang
+sources_avx2 += files('member_avx2.c')
if dpdk_conf.has('RTE_ARCH_X86_64')
if is_ms_compiler
sources_avx512 += files('rte_member_sketch_avx512.c')
diff --git a/lib/member/rte_member_ht.c b/lib/member/rte_member_ht.c
index 738471b378..0a5b206778 100644
--- a/lib/member/rte_member_ht.c
+++ b/lib/member/rte_member_ht.c
@@ -13,7 +13,7 @@
#include "rte_member.h"
#include "rte_member_ht.h"
-#if defined(RTE_ARCH_X86)
+#ifdef RTE_ARCH_X86
#include "rte_member_x86.h"
#endif
@@ -113,7 +113,7 @@ rte_member_create_ht(struct rte_member_setsum *ss,
for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
}
-#if defined(RTE_ARCH_X86)
+#ifdef RTE_ARCH_X86
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
RTE_MEMBER_BUCKET_ENTRIES == 16 &&
rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
@@ -179,7 +179,7 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss,
get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
switch (ss->sig_cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(__AVX2__)
+#ifdef RTE_ARCH_X86
case RTE_MEMBER_COMPARE_AVX2:
if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,
set_id) ||
@@ -219,7 +219,7 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
for (i = 0; i < num_keys; i++) {
switch (ss->sig_cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(__AVX2__)
+#ifdef RTE_ARCH_X86
case RTE_MEMBER_COMPARE_AVX2:
if (search_bucket_single_avx(prim_buckets[i],
tmp_sig[i], buckets, &set_id[i]) ||
@@ -256,7 +256,7 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
switch (ss->sig_cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(__AVX2__)
+#ifdef RTE_ARCH_X86
case RTE_MEMBER_COMPARE_AVX2:
search_bucket_multi_avx(prim_bucket, tmp_sig, buckets,
&num_matches, match_per_key, set_id);
@@ -299,7 +299,7 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
match_cnt_tmp = 0;
switch (ss->sig_cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(__AVX2__)
+#ifdef RTE_ARCH_X86
case RTE_MEMBER_COMPARE_AVX2:
search_bucket_multi_avx(prim_buckets[i], tmp_sig[i],
buckets, &match_cnt_tmp, match_per_key,
@@ -360,7 +360,7 @@ try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
enum rte_member_sig_compare_function cmp_fn)
{
switch (cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(__AVX2__)
+#ifdef RTE_ARCH_X86
case RTE_MEMBER_COMPARE_AVX2:
if (update_entry_search_avx(prim, sig, buckets, set_id) ||
update_entry_search_avx(sec, sig, buckets,
diff --git a/lib/member/rte_member_x86.h b/lib/member/rte_member_x86.h
index 4de453485b..dea2a9f7aa 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/rte_member_x86.h
@@ -5,74 +5,20 @@
#ifndef _RTE_MEMBER_X86_H_
#define _RTE_MEMBER_X86_H_
-#include <x86intrin.h>
+#include "rte_member_ht.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__AVX2__)
-
-static inline int
-update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig,
+int update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig,
struct member_ht_bucket *buckets,
- member_set_t set_id)
-{
- uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
- _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
- _mm256_set1_epi16(tmp_sig)));
- if (hitmask) {
- uint32_t hit_idx = rte_ctz32(hitmask) >> 1;
- buckets[bucket_id].sets[hit_idx] = set_id;
- return 1;
- }
- return 0;
-}
+ member_set_t set_id);
-static inline int
-search_bucket_single_avx(uint32_t bucket_id, member_sig_t tmp_sig,
+int search_bucket_single_avx(uint32_t bucket_id, member_sig_t tmp_sig,
struct member_ht_bucket *buckets,
- member_set_t *set_id)
-{
- uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
- _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
- _mm256_set1_epi16(tmp_sig)));
- while (hitmask) {
- uint32_t hit_idx = rte_ctz32(hitmask) >> 1;
- if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
- *set_id = buckets[bucket_id].sets[hit_idx];
- return 1;
- }
- hitmask &= ~(3U << ((hit_idx) << 1));
- }
- return 0;
-}
+ member_set_t *set_id);
-static inline void
-search_bucket_multi_avx(uint32_t bucket_id, member_sig_t tmp_sig,
+void search_bucket_multi_avx(uint32_t bucket_id, member_sig_t tmp_sig,
struct member_ht_bucket *buckets,
uint32_t *counter,
uint32_t match_per_key,
- member_set_t *set_id)
-{
- uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
- _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
- _mm256_set1_epi16(tmp_sig)));
- while (hitmask) {
- uint32_t hit_idx = rte_ctz32(hitmask) >> 1;
- if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
- set_id[*counter] = buckets[bucket_id].sets[hit_idx];
- (*counter)++;
- if (*counter >= match_per_key)
- return;
- }
- hitmask &= ~(3U << ((hit_idx) << 1));
- }
-}
-#endif
-
-#ifdef __cplusplus
-}
-#endif
+ member_set_t *set_id);
#endif /* _RTE_MEMBER_X86_H_ */
--
2.51.0
next prev parent reply other threads:[~2025-09-18 9:11 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-18 7:28 [PATCH 0/3] lib: fix AVX2 checks and macro exposure Thomas Monjalon
2025-09-18 7:28 ` [PATCH 1/3] efd: fix AVX2 support Thomas Monjalon
2025-09-18 7:48 ` Bruce Richardson
2025-09-18 8:16 ` Thomas Monjalon
2025-09-18 7:28 ` [PATCH 2/3] member: remove AVX2 build-time checks Thomas Monjalon
2025-09-18 7:49 ` Bruce Richardson
2025-09-18 7:28 ` [PATCH 3/3] member: hide internal macro Thomas Monjalon
2025-09-18 7:50 ` Bruce Richardson
2025-09-18 8:10 ` [PATCH 0/3] lib: fix AVX2 checks and macro exposure Thomas Monjalon
2025-09-18 8:59 ` Bruce Richardson
2025-09-18 9:08 ` [PATCH v2 0/4] " Thomas Monjalon
2025-09-18 9:08 ` [PATCH v2 1/4] efd: fix AVX2 support Thomas Monjalon
2025-09-18 9:40 ` Thomas Monjalon
2025-09-18 9:47 ` Thomas Monjalon
2025-09-18 9:08 ` [PATCH v2 2/4] efd: remove AVX2 build-time check Thomas Monjalon
2025-09-18 9:08 ` Thomas Monjalon [this message]
2025-09-18 9:08 ` [PATCH v2 4/4] member: hide internal macro Thomas Monjalon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250918091039.1368875-4-thomas@monjalon.net \
--to=thomas@monjalon.net \
--cc=bruce.richardson@intel.com \
--cc=dev@dpdk.org \
--cc=konstantin.ananyev@huawei.com \
--cc=sameh.gobriel@intel.com \
--cc=yipeng1.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).