* [RFC 0/3] hash: optimize compare logic @ 2025-08-21 20:35 Stephen Hemminger 2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger ` (3 more replies) 0 siblings, 4 replies; 16+ messages in thread From: Stephen Hemminger @ 2025-08-21 20:35 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger Recent discussion around handling small keys motivated furthur examination of the compare logic. Stephen Hemminger (3): hash: move table of hash compare functions out of header hash: reduce architecture special cases hash: add support for common small key sizes lib/hash/rte_cmp_arm64.h | 52 ---------- lib/hash/rte_cmp_x86.h | 56 +---------- lib/hash/rte_cuckoo_hash.c | 194 +++++++++++++++++++++++++++++++++++-- lib/hash/rte_cuckoo_hash.h | 79 +-------------- 4 files changed, 187 insertions(+), 194 deletions(-) -- 2.47.2 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC 1/3] hash: move table of hash compare functions out of header 2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger @ 2025-08-21 20:35 ` Stephen Hemminger 2025-08-22 9:05 ` Morten Brørup 2025-08-21 20:35 ` [RFC 2/3] hash: reduce architecture special cases Stephen Hemminger ` (2 subsequent siblings) 3 siblings, 1 reply; 16+ messages in thread From: Stephen Hemminger @ 2025-08-21 20:35 UTC (permalink / raw) To: dev Cc: Stephen Hemminger, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin Remove the definition of the compare jump table from the header file so the internal details are not exposed. Prevents future ABI breakage if new sizes are added. Make other macros local if possible, header should only contain exposed API. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/hash/rte_cuckoo_hash.c | 74 ++++++++++++++++++++++++++++++----- lib/hash/rte_cuckoo_hash.h | 79 +------------------------------------- 2 files changed, 65 insertions(+), 88 deletions(-) diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c index 2c92c51624..619fe0c691 100644 --- a/lib/hash/rte_cuckoo_hash.c +++ b/lib/hash/rte_cuckoo_hash.c @@ -25,14 +25,51 @@ #include <rte_tailq.h> #include "rte_hash.h" +#include "rte_cuckoo_hash.h" -/* needs to be before rte_cuckoo_hash.h */ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); #define RTE_LOGTYPE_HASH hash_logtype #define HASH_LOG(level, ...) \ RTE_LOG_LINE(level, HASH, "" __VA_ARGS__) -#include "rte_cuckoo_hash.h" +/* Macro to enable/disable run-time checking of function parameters */ +#if defined(RTE_LIBRTE_HASH_DEBUG) +#define RETURN_IF_TRUE(cond, retval) do { \ + if (cond) \ + return retval; \ +} while (0) +#else +#define RETURN_IF_TRUE(cond, retval) +#endif + +#if defined(RTE_ARCH_X86) +#include "rte_cmp_x86.h" +#endif + +#if defined(RTE_ARCH_ARM64) +#include "rte_cmp_arm64.h" +#endif + +/* + * All different options to select a key compare function, + * based on the key size and custom function. + * Not in rte_cuckoo_hash.h to avoid ABI issues. + */ +enum cmp_jump_table_case { + KEY_CUSTOM = 0, +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) + KEY_16_BYTES, + KEY_32_BYTES, + KEY_48_BYTES, + KEY_64_BYTES, + KEY_80_BYTES, + KEY_96_BYTES, + KEY_112_BYTES, + KEY_128_BYTES, +#endif + KEY_OTHER_BYTES, + NUM_KEY_CMP_CASES, +}; /* Enum used to select the implementation of the signature comparison function to use * eg: a system supporting SVE might want to use a NEON or scalar implementation. @@ -117,6 +154,25 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) h->rte_hash_custom_cmp_eq = func; } +/* + * Table storing all different key compare functions + * (multi-process supported) + */ +static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { + [KEY_CUSTOM] = NULL, +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) + [KEY_16_BYTES] = rte_hash_k16_cmp_eq, + [KEY_32_BYTES] = rte_hash_k32_cmp_eq, + [KEY_48_BYTES] = rte_hash_k48_cmp_eq, + [KEY_64_BYTES] = rte_hash_k64_cmp_eq, + [KEY_80_BYTES] = rte_hash_k80_cmp_eq, + [KEY_96_BYTES] = rte_hash_k96_cmp_eq, + [KEY_112_BYTES] = rte_hash_k112_cmp_eq, + [KEY_128_BYTES] = rte_hash_k128_cmp_eq, +#endif + [KEY_OTHER_BYTES] = memcmp, +}; + static inline int rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h) { @@ -390,13 +446,13 @@ rte_hash_create(const struct rte_hash_parameters *params) goto err_unlock; } -/* - * If x86 architecture is used, select appropriate compare function, - * which may use x86 intrinsics, otherwise use memcmp - */ -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) /* Select function to compare keys */ switch (params->key_len) { +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) + /* + * If x86 architecture is used, select appropriate compare function, + * which may use x86 intrinsics, otherwise use memcmp + */ case 16: h->cmp_jump_table_idx = KEY_16_BYTES; break; @@ -421,13 +477,11 @@ rte_hash_create(const struct rte_hash_parameters *params) case 128: h->cmp_jump_table_idx = KEY_128_BYTES; break; +#endif default: /* If key is not multiple of 16, use generic memcmp */ h->cmp_jump_table_idx = KEY_OTHER_BYTES; } -#else - h->cmp_jump_table_idx = KEY_OTHER_BYTES; -#endif if (use_local_cache) { local_free_slots = rte_zmalloc_socket(NULL, diff --git a/lib/hash/rte_cuckoo_hash.h b/lib/hash/rte_cuckoo_hash.h index 26a992419a..16fe999c4c 100644 --- a/lib/hash/rte_cuckoo_hash.h +++ b/lib/hash/rte_cuckoo_hash.h @@ -12,86 +12,9 @@ #define _RTE_CUCKOO_HASH_H_ #include <stdalign.h> - -#if defined(RTE_ARCH_X86) -#include "rte_cmp_x86.h" -#endif - -#if defined(RTE_ARCH_ARM64) -#include "rte_cmp_arm64.h" -#endif - -/* Macro to enable/disable run-time checking of function parameters */ -#if defined(RTE_LIBRTE_HASH_DEBUG) -#define RETURN_IF_TRUE(cond, retval) do { \ - if (cond) \ - return retval; \ -} while (0) -#else -#define RETURN_IF_TRUE(cond, retval) -#endif - #include <rte_hash_crc.h> #include <rte_jhash.h> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) -/* - * All different options to select a key compare function, - * based on the key size and custom function. - */ -enum cmp_jump_table_case { - KEY_CUSTOM = 0, - KEY_16_BYTES, - KEY_32_BYTES, - KEY_48_BYTES, - KEY_64_BYTES, - KEY_80_BYTES, - KEY_96_BYTES, - KEY_112_BYTES, - KEY_128_BYTES, - KEY_OTHER_BYTES, - NUM_KEY_CMP_CASES, -}; - -/* - * Table storing all different key compare functions - * (multi-process supported) - */ -const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { - NULL, - rte_hash_k16_cmp_eq, - rte_hash_k32_cmp_eq, - rte_hash_k48_cmp_eq, - rte_hash_k64_cmp_eq, - rte_hash_k80_cmp_eq, - rte_hash_k96_cmp_eq, - rte_hash_k112_cmp_eq, - rte_hash_k128_cmp_eq, - memcmp -}; -#else -/* - * All different options to select a key compare function, - * based on the key size and custom function. - */ -enum cmp_jump_table_case { - KEY_CUSTOM = 0, - KEY_OTHER_BYTES, - NUM_KEY_CMP_CASES, -}; - -/* - * Table storing all different key compare functions - * (multi-process supported) - */ -const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { - NULL, - memcmp -}; - -#endif - - /** * Number of items per bucket. * 8 is a tradeoff between performance and memory consumption. @@ -189,7 +112,7 @@ struct __rte_cache_aligned rte_hash { uint32_t hash_func_init_val; /**< Init value used by hash_func. */ rte_hash_cmp_eq_t rte_hash_custom_cmp_eq; /**< Custom function used to compare keys. */ - enum cmp_jump_table_case cmp_jump_table_idx; + unsigned int cmp_jump_table_idx; /**< Indicates which compare function to use. */ unsigned int sig_cmp_fn; /**< Indicates which signature compare function to use. */ -- 2.47.2 ^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [RFC 1/3] hash: move table of hash compare functions out of header 2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger @ 2025-08-22 9:05 ` Morten Brørup 0 siblings, 0 replies; 16+ messages in thread From: Morten Brørup @ 2025-08-22 9:05 UTC (permalink / raw) To: Stephen Hemminger, dev Cc: Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin > From: Stephen Hemminger [mailto:stephen@networkplumber.org] > Sent: Thursday, 21 August 2025 22.35 > > Remove the definition of the compare jump table from the > header file so the internal details are not exposed. > Prevents future ABI breakage if new sizes are added. > > Make other macros local if possible, header should > only contain exposed API. > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> > --- [...] > +/* > + * All different options to select a key compare function, > + * based on the key size and custom function. > + * Not in rte_cuckoo_hash.h to avoid ABI issues. > + */ > +enum cmp_jump_table_case { > + KEY_CUSTOM = 0, > +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) > + KEY_16_BYTES, > + KEY_32_BYTES, > + KEY_48_BYTES, > + KEY_64_BYTES, > + KEY_80_BYTES, > + KEY_96_BYTES, > + KEY_112_BYTES, > + KEY_128_BYTES, > +#endif > + KEY_OTHER_BYTES, > + NUM_KEY_CMP_CASES, > +}; > > /* Enum used to select the implementation of the signature comparison > function to use > * eg: a system supporting SVE might want to use a NEON or scalar > implementation. > @@ -117,6 +154,25 @@ void rte_hash_set_cmp_func(struct rte_hash *h, > rte_hash_cmp_eq_t func) > h->rte_hash_custom_cmp_eq = func; > } > > +/* > + * Table storing all different key compare functions > + * (multi-process supported) > + */ > +static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { > + [KEY_CUSTOM] = NULL, > +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) > + [KEY_16_BYTES] = rte_hash_k16_cmp_eq, > + [KEY_32_BYTES] = rte_hash_k32_cmp_eq, > + [KEY_48_BYTES] = rte_hash_k48_cmp_eq, > + [KEY_64_BYTES] = rte_hash_k64_cmp_eq, > + [KEY_80_BYTES] = rte_hash_k80_cmp_eq, > + [KEY_96_BYTES] = rte_hash_k96_cmp_eq, > + [KEY_112_BYTES] = rte_hash_k112_cmp_eq, > + [KEY_128_BYTES] = rte_hash_k128_cmp_eq, > +#endif > + [KEY_OTHER_BYTES] = memcmp, > +}; Nice trick explicitly indexing these here; it reduces the risk of not matching the cmp_jump_table_case. Consider adding static_assert() that RTE_DIM(cmp_jump_table) == NUM_KEY_CMP_CASES. With or without suggested static_assert()... Acked-by: Morten Brørup <mb@smartsharesystems.com> Good cleanup! ^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC 2/3] hash: reduce architecture special cases 2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger 2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger @ 2025-08-21 20:35 ` Stephen Hemminger 2025-08-22 9:20 ` Morten Brørup 2025-08-21 20:35 ` [RFC 3/3] hash: add support for common small key sizes Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 0/4] Cuckoo hash cleanup and optimizations Stephen Hemminger 3 siblings, 1 reply; 16+ messages in thread From: Stephen Hemminger @ 2025-08-21 20:35 UTC (permalink / raw) To: dev Cc: Stephen Hemminger, Wathsala Vithanage, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin Make comparison of power of 2 sizes compatible across platforms. Keep the special case code for 16 bytes for x86 and arm64 but also add simple xor for others. Need to make rte_hash_k32_cmp_eq() exposed because ip_frag code poaches it. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/hash/rte_cmp_arm64.h | 52 ---------------------- lib/hash/rte_cmp_x86.h | 56 +----------------------- lib/hash/rte_cuckoo_hash.c | 90 +++++++++++++++++++++++++++++++++----- 3 files changed, 80 insertions(+), 118 deletions(-) diff --git a/lib/hash/rte_cmp_arm64.h b/lib/hash/rte_cmp_arm64.h index a3e85635eb..c7c56526d0 100644 --- a/lib/hash/rte_cmp_arm64.h +++ b/lib/hash/rte_cmp_arm64.h @@ -31,55 +31,3 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) rte_hash_k16_cmp_eq((const char *) key1 + 16, (const char *) key2 + 16, key_len); } - -static inline int -rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k16_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 16, - (const char *) key2 + 16, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static inline int -rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k32_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static inline int -rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static inline int -rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static inline int -rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 96, - (const char *) key2 + 96, key_len); -} - -static inline int -rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k64_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} diff --git a/lib/hash/rte_cmp_x86.h b/lib/hash/rte_cmp_x86.h index ddfbef462f..62ac320b25 100644 --- a/lib/hash/rte_cmp_x86.h +++ b/lib/hash/rte_cmp_x86.h @@ -19,58 +19,6 @@ static inline int rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) { return rte_hash_k16_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 16, - (const char *) key2 + 16, key_len); -} - -static inline int -rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k16_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 16, - (const char *) key2 + 16, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static inline int -rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k32_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static inline int -rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static inline int -rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static inline int -rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 96, - (const char *) key2 + 96, key_len); -} - -static inline int -rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k64_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16, + (const uint8_t *) key2 + 16, key_len); } diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c index 619fe0c691..3212695d92 100644 --- a/lib/hash/rte_cuckoo_hash.c +++ b/lib/hash/rte_cuckoo_hash.c @@ -42,14 +42,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); #define RETURN_IF_TRUE(cond, retval) #endif -#if defined(RTE_ARCH_X86) -#include "rte_cmp_x86.h" -#endif - -#if defined(RTE_ARCH_ARM64) -#include "rte_cmp_arm64.h" -#endif - /* * All different options to select a key compare function, * based on the key size and custom function. @@ -57,7 +49,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); */ enum cmp_jump_table_case { KEY_CUSTOM = 0, -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) KEY_16_BYTES, KEY_32_BYTES, KEY_48_BYTES, @@ -66,11 +57,88 @@ enum cmp_jump_table_case { KEY_96_BYTES, KEY_112_BYTES, KEY_128_BYTES, -#endif KEY_OTHER_BYTES, NUM_KEY_CMP_CASES, }; + +#if defined(RTE_ARCH_X86) +#include "rte_cmp_x86.h" +#elif defined(RTE_ARCH_ARM64) +#include "rte_cmp_arm64.h" +#else + +static inline int +rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint64_t *k1 = key1; + const uint64_t *k2 = key2; + + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]); +} + +static inline int +rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16, + (const uint8_t *) key2 + 16, key_len); +} +#endif + + +static inline int +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16, + (const uint8_t *) key2 + 16, key_len) || + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 32, + (const uint8_t *) key2 + 32, key_len); +} + +static inline int +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k32_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const uint8_t *) key1 + 32, + (const uint8_t *) key2 + 32, key_len); +} + +static inline int +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 64, + (const uint8_t *) key2 + 64, key_len); +} + +static inline int +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64, + (const uint8_t *) key2 + 64, key_len); +} + +static inline int +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64, + (const uint8_t *) key2 + 64, key_len) || + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 96, + (const uint8_t *) key2 + 96, key_len); +} + +static inline int +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k64_cmp_eq((const uint8_t *) key1 + 64, + (const uint8_t *) key2 + 64, key_len); +} + /* Enum used to select the implementation of the signature comparison function to use * eg: a system supporting SVE might want to use a NEON or scalar implementation. */ @@ -160,7 +228,6 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) */ static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { [KEY_CUSTOM] = NULL, -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) [KEY_16_BYTES] = rte_hash_k16_cmp_eq, [KEY_32_BYTES] = rte_hash_k32_cmp_eq, [KEY_48_BYTES] = rte_hash_k48_cmp_eq, @@ -169,7 +236,6 @@ static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { [KEY_96_BYTES] = rte_hash_k96_cmp_eq, [KEY_112_BYTES] = rte_hash_k112_cmp_eq, [KEY_128_BYTES] = rte_hash_k128_cmp_eq, -#endif [KEY_OTHER_BYTES] = memcmp, }; -- 2.47.2 ^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [RFC 2/3] hash: reduce architecture special cases 2025-08-21 20:35 ` [RFC 2/3] hash: reduce architecture special cases Stephen Hemminger @ 2025-08-22 9:20 ` Morten Brørup 0 siblings, 0 replies; 16+ messages in thread From: Morten Brørup @ 2025-08-22 9:20 UTC (permalink / raw) To: Stephen Hemminger, dev Cc: Wathsala Vithanage, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin, Mattias Rönnblom > From: Stephen Hemminger [mailto:stephen@networkplumber.org] > Sent: Thursday, 21 August 2025 22.35 > > Make comparison of power of 2 sizes compatible across platforms. > Keep the special case code for 16 bytes for x86 and arm64 but > also add simple xor for others. > > Need to make rte_hash_k32_cmp_eq() exposed because ip_frag > code poaches it. One more argument why these architecture optimized compare functions should be exposed as generic EAL functions, like rte_mov16() etc. for rte_memcpy(), instead of hiding them away in the hash library. It could be rte_memeq16(const void *, const void *) etc. in /lib/eal/include, with architecture optimized variants, and a generic variant. And a wrapper calling them in the hash library, such as: static inline int rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) { return !rte_memeq16(key1, key2); } > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> > --- With our without suggested feature creep... Acked-by: Morten Brørup <mb@smartsharesystems.com> ^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC 3/3] hash: add support for common small key sizes 2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger 2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger 2025-08-21 20:35 ` [RFC 2/3] hash: reduce architecture special cases Stephen Hemminger @ 2025-08-21 20:35 ` Stephen Hemminger 2025-08-22 7:19 ` Mattias Rönnblom 2025-08-22 18:19 ` [PATCH v2 0/4] Cuckoo hash cleanup and optimizations Stephen Hemminger 3 siblings, 1 reply; 16+ messages in thread From: Stephen Hemminger @ 2025-08-21 20:35 UTC (permalink / raw) To: dev Cc: Stephen Hemminger, Morten Brørup, Mattias Rönnblom, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin Add new compare functions for common small key sizes. Bugzilla ID: 1775 Suggested-by: Morten Brørup <mb@smartsharesystems.com> Reported-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/hash/rte_cuckoo_hash.c | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c index 3212695d92..825889c320 100644 --- a/lib/hash/rte_cuckoo_hash.c +++ b/lib/hash/rte_cuckoo_hash.c @@ -49,6 +49,11 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); */ enum cmp_jump_table_case { KEY_CUSTOM = 0, + KEY_2_BYTES, + KEY_4_BYTES, + KEY_6_BYTES, + KEY_8_BYTES, + KEY_12_BYTES, KEY_16_BYTES, KEY_32_BYTES, KEY_48_BYTES, @@ -86,6 +91,50 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) } #endif +static inline int +rte_hash_k2_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint16_t *k1 = key1; + const uint16_t *k2 = key2; + + return k1[0] ^ k2[0]; +} + +static inline int +rte_hash_k4_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint32_t *k1 = key1; + const uint32_t *k2 = key2; + + return k1[0] ^ k2[0]; +} + +static inline int +rte_hash_k6_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint16_t *k1 = key1; + const uint16_t *k2 = key2; + + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); +} + +static inline int +rte_hash_k8_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint64_t *k1 = key1; + const uint64_t *k2 = key2; + + return k1[0] ^ k2[0]; +} + +static inline int +rte_hash_k12_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint32_t *k1 = key1; + const uint32_t *k2 = key2; + + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); +} static inline int rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) @@ -228,6 +277,11 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) */ static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { [KEY_CUSTOM] = NULL, + [KEY_2_BYTES] = rte_hash_k2_cmp_eq, + [KEY_4_BYTES] = rte_hash_k4_cmp_eq, + [KEY_6_BYTES] = rte_hash_k6_cmp_eq, + [KEY_8_BYTES] = rte_hash_k8_cmp_eq, + [KEY_12_BYTES] = rte_hash_k12_cmp_eq, [KEY_16_BYTES] = rte_hash_k16_cmp_eq, [KEY_32_BYTES] = rte_hash_k32_cmp_eq, [KEY_48_BYTES] = rte_hash_k48_cmp_eq, -- 2.47.2 ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC 3/3] hash: add support for common small key sizes 2025-08-21 20:35 ` [RFC 3/3] hash: add support for common small key sizes Stephen Hemminger @ 2025-08-22 7:19 ` Mattias Rönnblom 2025-08-22 9:50 ` Morten Brørup 2025-08-22 16:12 ` Stephen Hemminger 0 siblings, 2 replies; 16+ messages in thread From: Mattias Rönnblom @ 2025-08-22 7:19 UTC (permalink / raw) To: Stephen Hemminger, dev Cc: Morten Brørup, Mattias Rönnblom, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin On 2025-08-21 22:35, Stephen Hemminger wrote: > Add new compare functions for common small key sizes. > > Bugzilla ID: 1775 > Suggested-by: Morten Brørup <mb@smartsharesystems.com> > Reported-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com> > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> > --- > lib/hash/rte_cuckoo_hash.c | 54 ++++++++++++++++++++++++++++++++++++++ > 1 file changed, 54 insertions(+) > > diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c > index 3212695d92..825889c320 100644 > --- a/lib/hash/rte_cuckoo_hash.c > +++ b/lib/hash/rte_cuckoo_hash.c > @@ -49,6 +49,11 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); > */ > enum cmp_jump_table_case { > KEY_CUSTOM = 0, > + KEY_2_BYTES, > + KEY_4_BYTES, > + KEY_6_BYTES, > + KEY_8_BYTES, > + KEY_12_BYTES, > KEY_16_BYTES, > KEY_32_BYTES, > KEY_48_BYTES, > @@ -86,6 +91,50 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) > } > #endif > > +static inline int > +rte_hash_k2_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) > +{ > + const uint16_t *k1 = key1; > + const uint16_t *k2 = key2; > + What we do now is to require the keys are 16-bit aligned (which wasn't the case before). You could uint16_t k1; memcpy(&k1, key1, sizeof(uint16_t)); instead. Would generate the same code, but be safe from any future alignment issues. Anyway, maybe it's safe to assume the keys are aligned, so this is not an issue. > + return k1[0] ^ k2[0]; > +} Haven't you implemented "neq" rather than "eq" here? If the keys are equal, the result is 0. Should be != 0. Would it be worth adding a comment like "use XOR to make this branch-free"? It may not be obvious to all readers. That said, I’m not sure this trick will actually change the generated object code - especially if the result of the eq function is still used in a conditional afterward. Anyway, keeping it seems like a good conservative approach. > + > +static inline int > +rte_hash_k4_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) > +{ > + const uint32_t *k1 = key1; > + const uint32_t *k2 = key2; > + > + return k1[0] ^ k2[0]; > +} > + > +static inline int > +rte_hash_k6_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) > +{ > + const uint16_t *k1 = key1; > + const uint16_t *k2 = key2; > + > + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); > +} > + > +static inline int > +rte_hash_k8_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) > +{ > + const uint64_t *k1 = key1; > + const uint64_t *k2 = key2; > + > + return k1[0] ^ k2[0]; > +} > + > +static inline int > +rte_hash_k12_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) > +{ > + const uint32_t *k1 = key1; > + const uint32_t *k2 = key2; > + > + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); > +} > > static inline int > rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) > @@ -228,6 +277,11 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) > */ > static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { > [KEY_CUSTOM] = NULL, > + [KEY_2_BYTES] = rte_hash_k2_cmp_eq, > + [KEY_4_BYTES] = rte_hash_k4_cmp_eq, > + [KEY_6_BYTES] = rte_hash_k6_cmp_eq, > + [KEY_8_BYTES] = rte_hash_k8_cmp_eq, > + [KEY_12_BYTES] = rte_hash_k12_cmp_eq, > [KEY_16_BYTES] = rte_hash_k16_cmp_eq, > [KEY_32_BYTES] = rte_hash_k32_cmp_eq, > [KEY_48_BYTES] = rte_hash_k48_cmp_eq, ^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [RFC 3/3] hash: add support for common small key sizes 2025-08-22 7:19 ` Mattias Rönnblom @ 2025-08-22 9:50 ` Morten Brørup 2025-08-22 15:05 ` Mattias Rönnblom 2025-08-22 16:12 ` Stephen Hemminger 1 sibling, 1 reply; 16+ messages in thread From: Morten Brørup @ 2025-08-22 9:50 UTC (permalink / raw) To: Mattias Rönnblom, Stephen Hemminger, dev Cc: Mattias Rönnblom, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin > From: Mattias Rönnblom [mailto:hofors@lysator.liu.se] > Sent: Friday, 22 August 2025 09.20 > > On 2025-08-21 22:35, Stephen Hemminger wrote: > > Add new compare functions for common small key sizes. > > > > Bugzilla ID: 1775 > > Suggested-by: Morten Brørup <mb@smartsharesystems.com> > > Reported-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com> > > > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> > > --- > > lib/hash/rte_cuckoo_hash.c | 54 ++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 54 insertions(+) > > > > diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c > > index 3212695d92..825889c320 100644 > > --- a/lib/hash/rte_cuckoo_hash.c > > +++ b/lib/hash/rte_cuckoo_hash.c > > @@ -49,6 +49,11 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); > > */ > > enum cmp_jump_table_case { > > KEY_CUSTOM = 0, > > + KEY_2_BYTES, > > + KEY_4_BYTES, > > + KEY_6_BYTES, > > + KEY_8_BYTES, > > + KEY_12_BYTES, While you are at it, consider adding handlers for 3, 5, 10 and 14 bytes too. > > KEY_16_BYTES, > > KEY_32_BYTES, > > KEY_48_BYTES, > > @@ -86,6 +91,50 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, > size_t key_len) > > } > > #endif > > > > +static inline int > > +rte_hash_k2_cmp_eq(const void *key1, const void *key2, size_t key_len > __rte_unused) > > +{ > > + const uint16_t *k1 = key1; > > + const uint16_t *k2 = key2; > > + > > What we do now is to require the keys are 16-bit aligned (which wasn't > the case before). > > You could > > uint16_t k1; > memcpy(&k1, key1, sizeof(uint16_t)); > instead. > > Would generate the same code, but be safe from any future alignment issues. Or use the unaligned types, e.g.: const unaligned_uint16_t *k1 = key1; const unaligned_uint16_t *k2 = key2; > > Anyway, maybe it's safe to assume the keys are aligned, so this is not > an issue. Lots of DPDK code ignores alignment issues. > > > + return k1[0] ^ k2[0]; > > +} > > Haven't you implemented "neq" rather than "eq" here? If the keys are > equal, the result is 0. Should be != 0. Not a bug. These hash compare functions are in fact "neq", not "eq". Having "_cmp_eq" in the function names is extremely unfortunate and misleading. > > Would it be worth adding a comment like "use XOR to make this > branch-free"? It may not be obvious to all readers. > > That said, I’m not sure this trick will actually change the generated > object code - especially if the result of the eq function is still used > in a conditional afterward. Anyway, keeping it seems like a good > conservative approach. I wonder if any compiler is clever enough to use a different memcmp implementation if we inform the compiler at build time that we don't care if key1 is less than or greater key2, only if they differ or not. If so, the OTHER_BYTES handler shouldn't call memcmp() directly, but a wrapper around it: rte_hash_k_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) { return memcmp(key1, key2, key_len) != 0; } > > > + > > +static inline int > > +rte_hash_k4_cmp_eq(const void *key1, const void *key2, size_t key_len > __rte_unused) > > +{ > > + const uint32_t *k1 = key1; > > + const uint32_t *k2 = key2; > > + > > + return k1[0] ^ k2[0]; > > +} > > + > > +static inline int > > +rte_hash_k6_cmp_eq(const void *key1, const void *key2, size_t key_len > __rte_unused) > > +{ > > + const uint16_t *k1 = key1; > > + const uint16_t *k2 = key2; > > + > > + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); > > +} > > + > > +static inline int > > +rte_hash_k8_cmp_eq(const void *key1, const void *key2, size_t key_len > __rte_unused) > > +{ > > + const uint64_t *k1 = key1; > > + const uint64_t *k2 = key2; > > + > > + return k1[0] ^ k2[0]; I think this has a truncation bug when converting from uint64_t to int return type. Also consider if 32 bit architecture need a different implementation. (Don't know, just raising a potential flag.) > > +} > > + > > +static inline int > > +rte_hash_k12_cmp_eq(const void *key1, const void *key2, size_t key_len > __rte_unused) > > +{ > > + const uint32_t *k1 = key1; > > + const uint32_t *k2 = key2; > > + > > + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); > > +} > > > > static inline int > > rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) > > @@ -228,6 +277,11 @@ void rte_hash_set_cmp_func(struct rte_hash *h, > rte_hash_cmp_eq_t func) > > */ > > static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { > > [KEY_CUSTOM] = NULL, > > + [KEY_2_BYTES] = rte_hash_k2_cmp_eq, > > + [KEY_4_BYTES] = rte_hash_k4_cmp_eq, > > + [KEY_6_BYTES] = rte_hash_k6_cmp_eq, > > + [KEY_8_BYTES] = rte_hash_k8_cmp_eq, > > + [KEY_12_BYTES] = rte_hash_k12_cmp_eq, > > [KEY_16_BYTES] = rte_hash_k16_cmp_eq, > > [KEY_32_BYTES] = rte_hash_k32_cmp_eq, > > [KEY_48_BYTES] = rte_hash_k48_cmp_eq, ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC 3/3] hash: add support for common small key sizes 2025-08-22 9:50 ` Morten Brørup @ 2025-08-22 15:05 ` Mattias Rönnblom 2025-08-22 18:57 ` Morten Brørup 0 siblings, 1 reply; 16+ messages in thread From: Mattias Rönnblom @ 2025-08-22 15:05 UTC (permalink / raw) To: Morten Brørup, Stephen Hemminger, dev Cc: Mattias Rönnblom, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin On 2025-08-22 11:50, Morten Brørup wrote: >> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se] >> Sent: Friday, 22 August 2025 09.20 >> >> On 2025-08-21 22:35, Stephen Hemminger wrote: >>> Add new compare functions for common small key sizes. >>> >>> Bugzilla ID: 1775 >>> Suggested-by: Morten Brørup <mb@smartsharesystems.com> >>> Reported-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com> >>> >>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> >>> --- >>> lib/hash/rte_cuckoo_hash.c | 54 ++++++++++++++++++++++++++++++++++++++ >>> 1 file changed, 54 insertions(+) >>> >>> diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c >>> index 3212695d92..825889c320 100644 >>> --- a/lib/hash/rte_cuckoo_hash.c >>> +++ b/lib/hash/rte_cuckoo_hash.c >>> @@ -49,6 +49,11 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); >>> */ >>> enum cmp_jump_table_case { >>> KEY_CUSTOM = 0, >>> + KEY_2_BYTES, >>> + KEY_4_BYTES, >>> + KEY_6_BYTES, >>> + KEY_8_BYTES, >>> + KEY_12_BYTES, > > While you are at it, consider adding handlers for 3, 5, 10 and 14 bytes too. > >>> KEY_16_BYTES, >>> KEY_32_BYTES, >>> KEY_48_BYTES, >>> @@ -86,6 +91,50 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, >> size_t key_len) >>> } >>> #endif >>> >>> +static inline int >>> +rte_hash_k2_cmp_eq(const void *key1, const void *key2, size_t key_len >> __rte_unused) >>> +{ >>> + const uint16_t *k1 = key1; >>> + const uint16_t *k2 = key2; >>> + >> >> What we do now is to require the keys are 16-bit aligned (which wasn't >> the case before). >> >> You could >> >> uint16_t k1; >> memcpy(&k1, key1, sizeof(uint16_t)); >> instead. >> >> Would generate the same code, but be safe from any future alignment issues. > > Or use the unaligned types, e.g.: > const unaligned_uint16_t *k1 = key1; > const unaligned_uint16_t *k2 = key2; > Could you explain why that is safe? Doesn't __attribute__((__aligned__(1))) just say specify the object doesn't have any alignment requirements, without asking the compiler to deal with it? >> >> Anyway, maybe it's safe to assume the keys are aligned, so this is not >> an issue. > > Lots of DPDK code ignores alignment issues. > >> >>> + return k1[0] ^ k2[0]; >>> +} >> >> Haven't you implemented "neq" rather than "eq" here? If the keys are >> equal, the result is 0. Should be != 0. > > Not a bug. Well, the function body doesn't do what the function name tells it. :) > These hash compare functions are in fact "neq", not "eq". > Having "_cmp_eq" in the function names is extremely unfortunate and misleading. > >> >> Would it be worth adding a comment like "use XOR to make this >> branch-free"? It may not be obvious to all readers. >> >> That said, I’m not sure this trick will actually change the generated >> object code - especially if the result of the eq function is still used >> in a conditional afterward. Anyway, keeping it seems like a good >> conservative approach. > > I wonder if any compiler is clever enough to use a different memcmp implementation if we inform the compiler at build time that we don't care if key1 is less than or greater key2, only if they differ or not. All what is needed is a constant-size length. (Only tested with the most recent GCC and clang.) At least GCC will emit a cmp instruction though (so not branch free), if that matters. > If so, the OTHER_BYTES handler shouldn't call memcmp() directly, but a wrapper around it: > > rte_hash_k_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) > { > return memcmp(key1, key2, key_len) != 0; > } > Should work. (Remove __rte_unused.) >> >>> + >>> +static inline int >>> +rte_hash_k4_cmp_eq(const void *key1, const void *key2, size_t key_len >> __rte_unused) >>> +{ >>> + const uint32_t *k1 = key1; >>> + const uint32_t *k2 = key2; >>> + >>> + return k1[0] ^ k2[0]; >>> +} >>> + >>> +static inline int >>> +rte_hash_k6_cmp_eq(const void *key1, const void *key2, size_t key_len >> __rte_unused) >>> +{ >>> + const uint16_t *k1 = key1; >>> + const uint16_t *k2 = key2; >>> + >>> + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); >>> +} >>> + >>> +static inline int >>> +rte_hash_k8_cmp_eq(const void *key1, const void *key2, size_t key_len >> __rte_unused) >>> +{ >>> + const uint64_t *k1 = key1; >>> + const uint64_t *k2 = key2; >>> + >>> + return k1[0] ^ k2[0]; > > I think this has a truncation bug when converting from uint64_t to int return type. > Also consider if 32 bit architecture need a different implementation. (Don't know, just raising a potential flag.) > >>> +} >>> + >>> +static inline int >>> +rte_hash_k12_cmp_eq(const void *key1, const void *key2, size_t key_len >> __rte_unused) >>> +{ >>> + const uint32_t *k1 = key1; >>> + const uint32_t *k2 = key2; >>> + >>> + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); >>> +} >>> >>> static inline int >>> rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) >>> @@ -228,6 +277,11 @@ void rte_hash_set_cmp_func(struct rte_hash *h, >> rte_hash_cmp_eq_t func) >>> */ >>> static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { >>> [KEY_CUSTOM] = NULL, >>> + [KEY_2_BYTES] = rte_hash_k2_cmp_eq, >>> + [KEY_4_BYTES] = rte_hash_k4_cmp_eq, >>> + [KEY_6_BYTES] = rte_hash_k6_cmp_eq, >>> + [KEY_8_BYTES] = rte_hash_k8_cmp_eq, >>> + [KEY_12_BYTES] = rte_hash_k12_cmp_eq, >>> [KEY_16_BYTES] = rte_hash_k16_cmp_eq, >>> [KEY_32_BYTES] = rte_hash_k32_cmp_eq, >>> [KEY_48_BYTES] = rte_hash_k48_cmp_eq, > ^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [RFC 3/3] hash: add support for common small key sizes 2025-08-22 15:05 ` Mattias Rönnblom @ 2025-08-22 18:57 ` Morten Brørup 0 siblings, 0 replies; 16+ messages in thread From: Morten Brørup @ 2025-08-22 18:57 UTC (permalink / raw) To: Mattias Rönnblom, Stephen Hemminger, dev Cc: Mattias Rönnblom, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin > >>> +static inline int > >>> +rte_hash_k2_cmp_eq(const void *key1, const void *key2, size_t key_len > >> __rte_unused) > >>> +{ > >>> + const uint16_t *k1 = key1; > >>> + const uint16_t *k2 = key2; > >>> + > >> > >> What we do now is to require the keys are 16-bit aligned (which wasn't > >> the case before). > >> > >> You could > >> > >> uint16_t k1; > >> memcpy(&k1, key1, sizeof(uint16_t)); > >> instead. > >> > >> Would generate the same code, but be safe from any future alignment issues. > > > > Or use the unaligned types, e.g.: > > const unaligned_uint16_t *k1 = key1; > > const unaligned_uint16_t *k2 = key2; > > > > Could you explain why that is safe? Doesn't > __attribute__((__aligned__(1))) > just say specify the object doesn't have any alignment requirements, > without asking the compiler to deal with it? It is safe because the compiler does deal with it. Here's a simple example: https://godbolt.org/z/r39zdeEcx I don't know how MSVC deals with it, but it doesn't support alignment sensitive architectures, so no real problem. > > >> > >> Anyway, maybe it's safe to assume the keys are aligned, so this is not > >> an issue. > > > > Lots of DPDK code ignores alignment issues. > > > >> > >>> + return k1[0] ^ k2[0]; > >>> +} > >> > >> Haven't you implemented "neq" rather than "eq" here? If the keys are > >> equal, the result is 0. Should be != 0. > > > > Not a bug. > > Well, the function body doesn't do what the function name tells it. :) Agree. They really should be renamed to _cmp_neq. And _cmp_eq wrappers could be kept for backwards API compatibility. Possibly marked deprecated. > > > These hash compare functions are in fact "neq", not "eq". > > Having "_cmp_eq" in the function names is extremely unfortunate and > misleading. > > > >> > >> Would it be worth adding a comment like "use XOR to make this > >> branch-free"? It may not be obvious to all readers. > >> > >> That said, I’m not sure this trick will actually change the generated > >> object code - especially if the result of the eq function is still used > >> in a conditional afterward. Anyway, keeping it seems like a good > >> conservative approach. > > > > I wonder if any compiler is clever enough to use a different memcmp > implementation if we inform the compiler at build time that we don't care if > key1 is less than or greater key2, only if they differ or not. > > All what is needed is a constant-size length. (Only tested with the most > recent GCC and clang.) Yes. But that was not what I was thinking about... I was wondering if "memcmp()!=0" compiles to code that calls some other memcmp implementation that doesn't check which of the two strings is lower, but only tests if they differ. > > At least GCC will emit a cmp instruction though (so not branch free), if > that matters. > > > If so, the OTHER_BYTES handler shouldn't call memcmp() directly, but a > wrapper around it: > > > > rte_hash_k_cmp_eq(const void *key1, const void *key2, size_t key_len > __rte_unused) > > { > > return memcmp(key1, key2, key_len) != 0; > > } > > > > Should work. (Remove __rte_unused.) I just tested it in Compiler Explorer, and the function calls the same memcmp(), regardless of the !=0. So no benefit compared to calling memcmp() directly from the jump table. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC 3/3] hash: add support for common small key sizes 2025-08-22 7:19 ` Mattias Rönnblom 2025-08-22 9:50 ` Morten Brørup @ 2025-08-22 16:12 ` Stephen Hemminger 1 sibling, 0 replies; 16+ messages in thread From: Stephen Hemminger @ 2025-08-22 16:12 UTC (permalink / raw) To: Mattias Rönnblom Cc: dev, Morten Brørup, Mattias Rönnblom, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin On Fri, 22 Aug 2025 09:19:45 +0200 Mattias Rönnblom <hofors@lysator.liu.se> wrote: > On 2025-08-21 22:35, Stephen Hemminger wrote: > > Add new compare functions for common small key sizes. > > > > Bugzilla ID: 1775 > > Suggested-by: Morten Brørup <mb@smartsharesystems.com> > > Reported-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com> > > > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> > > --- > > lib/hash/rte_cuckoo_hash.c | 54 ++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 54 insertions(+) > > > > diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c > > index 3212695d92..825889c320 100644 > > --- a/lib/hash/rte_cuckoo_hash.c > > +++ b/lib/hash/rte_cuckoo_hash.c > > @@ -49,6 +49,11 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); > > */ > > enum cmp_jump_table_case { > > KEY_CUSTOM = 0, > > + KEY_2_BYTES, > > + KEY_4_BYTES, > > + KEY_6_BYTES, > > + KEY_8_BYTES, > > + KEY_12_BYTES, > > KEY_16_BYTES, > > KEY_32_BYTES, > > KEY_48_BYTES, > > @@ -86,6 +91,50 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) > > } > > #endif > > > > +static inline int > > +rte_hash_k2_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) > > +{ > > + const uint16_t *k1 = key1; > > + const uint16_t *k2 = key2; > > + > > What we do now is to require the keys are 16-bit aligned (which wasn't > the case before). > > You could > > uint16_t k1; > memcpy(&k1, key1, sizeof(uint16_t)); > instead. > > Would generate the same code, but be safe from any future alignment issues. > > Anyway, maybe it's safe to assume the keys are aligned, so this is not > an issue. The keys are always in rte_hash_keys which has the key field aligned at uintptr_t. > > > + return k1[0] ^ k2[0]; > > +} > > Haven't you implemented "neq" rather than "eq" here? If the keys are > equal, the result is 0. Should be != 0. The functions use same return value as memcmp() which returns 0 on match. > > Would it be worth adding a comment like "use XOR to make this > branch-free"? It may not be obvious to all readers. > > That said, I’m not sure this trick will actually change the generated > object code - especially if the result of the eq function is still used > in a conditional afterward. Anyway, keeping it seems like a good > conservative approach. Compiler is not smart enough to get past the array of functions to optimize across that. ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v2 0/4] Cuckoo hash cleanup and optimizations 2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger ` (2 preceding siblings ...) 2025-08-21 20:35 ` [RFC 3/3] hash: add support for common small key sizes Stephen Hemminger @ 2025-08-22 18:19 ` Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 1/4] hash: move table of hash compare functions out of header Stephen Hemminger ` (3 more replies) 3 siblings, 4 replies; 16+ messages in thread From: Stephen Hemminger @ 2025-08-22 18:19 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger Recent discussion around handling small keys motivated furthur examination of the compare logic. Stephen Hemminger (4): hash: move table of hash compare functions out of header hash: use static_assert hash: reduce architecture special cases hash: add support for common small key sizes app/test/test_hash.c | 4 +- lib/hash/rte_cmp_arm64.h | 56 +-------- lib/hash/rte_cmp_generic.h | 35 ++++++ lib/hash/rte_cmp_x86.h | 60 +--------- lib/hash/rte_cuckoo_hash.c | 238 +++++++++++++++++++++++++++++++++++-- lib/hash/rte_cuckoo_hash.h | 84 +------------ 6 files changed, 275 insertions(+), 202 deletions(-) create mode 100644 lib/hash/rte_cmp_generic.h -- 2.47.2 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v2 1/4] hash: move table of hash compare functions out of header 2025-08-22 18:19 ` [PATCH v2 0/4] Cuckoo hash cleanup and optimizations Stephen Hemminger @ 2025-08-22 18:19 ` Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 2/4] hash: use static_assert Stephen Hemminger ` (2 subsequent siblings) 3 siblings, 0 replies; 16+ messages in thread From: Stephen Hemminger @ 2025-08-22 18:19 UTC (permalink / raw) To: dev Cc: Stephen Hemminger, Morten Brørup, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin Remove the definition of the compare jump table from the header file so the internal details are not exposed. Prevents future ABI breakage if new sizes are added. Make other macros local if possible, header should only contain exposed API. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> Acked-by: Morten Brørup <mb@smartsharesystems.com> --- lib/hash/rte_cuckoo_hash.c | 74 ++++++++++++++++++++++++++++++----- lib/hash/rte_cuckoo_hash.h | 79 +------------------------------------- 2 files changed, 65 insertions(+), 88 deletions(-) diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c index 2c92c51624..619fe0c691 100644 --- a/lib/hash/rte_cuckoo_hash.c +++ b/lib/hash/rte_cuckoo_hash.c @@ -25,14 +25,51 @@ #include <rte_tailq.h> #include "rte_hash.h" +#include "rte_cuckoo_hash.h" -/* needs to be before rte_cuckoo_hash.h */ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); #define RTE_LOGTYPE_HASH hash_logtype #define HASH_LOG(level, ...) \ RTE_LOG_LINE(level, HASH, "" __VA_ARGS__) -#include "rte_cuckoo_hash.h" +/* Macro to enable/disable run-time checking of function parameters */ +#if defined(RTE_LIBRTE_HASH_DEBUG) +#define RETURN_IF_TRUE(cond, retval) do { \ + if (cond) \ + return retval; \ +} while (0) +#else +#define RETURN_IF_TRUE(cond, retval) +#endif + +#if defined(RTE_ARCH_X86) +#include "rte_cmp_x86.h" +#endif + +#if defined(RTE_ARCH_ARM64) +#include "rte_cmp_arm64.h" +#endif + +/* + * All different options to select a key compare function, + * based on the key size and custom function. + * Not in rte_cuckoo_hash.h to avoid ABI issues. + */ +enum cmp_jump_table_case { + KEY_CUSTOM = 0, +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) + KEY_16_BYTES, + KEY_32_BYTES, + KEY_48_BYTES, + KEY_64_BYTES, + KEY_80_BYTES, + KEY_96_BYTES, + KEY_112_BYTES, + KEY_128_BYTES, +#endif + KEY_OTHER_BYTES, + NUM_KEY_CMP_CASES, +}; /* Enum used to select the implementation of the signature comparison function to use * eg: a system supporting SVE might want to use a NEON or scalar implementation. @@ -117,6 +154,25 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) h->rte_hash_custom_cmp_eq = func; } +/* + * Table storing all different key compare functions + * (multi-process supported) + */ +static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { + [KEY_CUSTOM] = NULL, +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) + [KEY_16_BYTES] = rte_hash_k16_cmp_eq, + [KEY_32_BYTES] = rte_hash_k32_cmp_eq, + [KEY_48_BYTES] = rte_hash_k48_cmp_eq, + [KEY_64_BYTES] = rte_hash_k64_cmp_eq, + [KEY_80_BYTES] = rte_hash_k80_cmp_eq, + [KEY_96_BYTES] = rte_hash_k96_cmp_eq, + [KEY_112_BYTES] = rte_hash_k112_cmp_eq, + [KEY_128_BYTES] = rte_hash_k128_cmp_eq, +#endif + [KEY_OTHER_BYTES] = memcmp, +}; + static inline int rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h) { @@ -390,13 +446,13 @@ rte_hash_create(const struct rte_hash_parameters *params) goto err_unlock; } -/* - * If x86 architecture is used, select appropriate compare function, - * which may use x86 intrinsics, otherwise use memcmp - */ -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) /* Select function to compare keys */ switch (params->key_len) { +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) + /* + * If x86 architecture is used, select appropriate compare function, + * which may use x86 intrinsics, otherwise use memcmp + */ case 16: h->cmp_jump_table_idx = KEY_16_BYTES; break; @@ -421,13 +477,11 @@ rte_hash_create(const struct rte_hash_parameters *params) case 128: h->cmp_jump_table_idx = KEY_128_BYTES; break; +#endif default: /* If key is not multiple of 16, use generic memcmp */ h->cmp_jump_table_idx = KEY_OTHER_BYTES; } -#else - h->cmp_jump_table_idx = KEY_OTHER_BYTES; -#endif if (use_local_cache) { local_free_slots = rte_zmalloc_socket(NULL, diff --git a/lib/hash/rte_cuckoo_hash.h b/lib/hash/rte_cuckoo_hash.h index 26a992419a..16fe999c4c 100644 --- a/lib/hash/rte_cuckoo_hash.h +++ b/lib/hash/rte_cuckoo_hash.h @@ -12,86 +12,9 @@ #define _RTE_CUCKOO_HASH_H_ #include <stdalign.h> - -#if defined(RTE_ARCH_X86) -#include "rte_cmp_x86.h" -#endif - -#if defined(RTE_ARCH_ARM64) -#include "rte_cmp_arm64.h" -#endif - -/* Macro to enable/disable run-time checking of function parameters */ -#if defined(RTE_LIBRTE_HASH_DEBUG) -#define RETURN_IF_TRUE(cond, retval) do { \ - if (cond) \ - return retval; \ -} while (0) -#else -#define RETURN_IF_TRUE(cond, retval) -#endif - #include <rte_hash_crc.h> #include <rte_jhash.h> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) -/* - * All different options to select a key compare function, - * based on the key size and custom function. - */ -enum cmp_jump_table_case { - KEY_CUSTOM = 0, - KEY_16_BYTES, - KEY_32_BYTES, - KEY_48_BYTES, - KEY_64_BYTES, - KEY_80_BYTES, - KEY_96_BYTES, - KEY_112_BYTES, - KEY_128_BYTES, - KEY_OTHER_BYTES, - NUM_KEY_CMP_CASES, -}; - -/* - * Table storing all different key compare functions - * (multi-process supported) - */ -const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { - NULL, - rte_hash_k16_cmp_eq, - rte_hash_k32_cmp_eq, - rte_hash_k48_cmp_eq, - rte_hash_k64_cmp_eq, - rte_hash_k80_cmp_eq, - rte_hash_k96_cmp_eq, - rte_hash_k112_cmp_eq, - rte_hash_k128_cmp_eq, - memcmp -}; -#else -/* - * All different options to select a key compare function, - * based on the key size and custom function. - */ -enum cmp_jump_table_case { - KEY_CUSTOM = 0, - KEY_OTHER_BYTES, - NUM_KEY_CMP_CASES, -}; - -/* - * Table storing all different key compare functions - * (multi-process supported) - */ -const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { - NULL, - memcmp -}; - -#endif - - /** * Number of items per bucket. * 8 is a tradeoff between performance and memory consumption. @@ -189,7 +112,7 @@ struct __rte_cache_aligned rte_hash { uint32_t hash_func_init_val; /**< Init value used by hash_func. */ rte_hash_cmp_eq_t rte_hash_custom_cmp_eq; /**< Custom function used to compare keys. */ - enum cmp_jump_table_case cmp_jump_table_idx; + unsigned int cmp_jump_table_idx; /**< Indicates which compare function to use. */ unsigned int sig_cmp_fn; /**< Indicates which signature compare function to use. */ -- 2.47.2 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v2 2/4] hash: use static_assert 2025-08-22 18:19 ` [PATCH v2 0/4] Cuckoo hash cleanup and optimizations Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 1/4] hash: move table of hash compare functions out of header Stephen Hemminger @ 2025-08-22 18:19 ` Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 3/4] hash: reduce architecture special cases Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 4/4] hash: add support for common small key sizes Stephen Hemminger 3 siblings, 0 replies; 16+ messages in thread From: Stephen Hemminger @ 2025-08-22 18:19 UTC (permalink / raw) To: dev Cc: Stephen Hemminger, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin Now that static_assert is available, better than pre-processor use of #if condition. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/hash/rte_cuckoo_hash.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/hash/rte_cuckoo_hash.h b/lib/hash/rte_cuckoo_hash.h index 16fe999c4c..90fda7d7e0 100644 --- a/lib/hash/rte_cuckoo_hash.h +++ b/lib/hash/rte_cuckoo_hash.h @@ -24,9 +24,8 @@ */ #define RTE_HASH_BUCKET_ENTRIES 8 -#if !RTE_IS_POWER_OF_2(RTE_HASH_BUCKET_ENTRIES) -#error RTE_HASH_BUCKET_ENTRIES must be a power of 2 -#endif +static_assert(RTE_IS_POWER_OF_2(RTE_HASH_BUCKET_ENTRIES), + "RTE_HASH_BUCKET_ENTRIES must be a power of 2"); #define NULL_SIGNATURE 0 -- 2.47.2 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v2 3/4] hash: reduce architecture special cases 2025-08-22 18:19 ` [PATCH v2 0/4] Cuckoo hash cleanup and optimizations Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 1/4] hash: move table of hash compare functions out of header Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 2/4] hash: use static_assert Stephen Hemminger @ 2025-08-22 18:19 ` Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 4/4] hash: add support for common small key sizes Stephen Hemminger 3 siblings, 0 replies; 16+ messages in thread From: Stephen Hemminger @ 2025-08-22 18:19 UTC (permalink / raw) To: dev Cc: Stephen Hemminger, Wathsala Vithanage, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin Make comparison of sizes compatible across platforms. Keep the special case code for 16 bytes for x86 and arm64 but also add simple xor for others. Need to keep rte_hash_k32_cmp_eq() exposed because ip_frag code poaches it. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/hash/rte_cmp_arm64.h | 56 +------------------------ lib/hash/rte_cmp_generic.h | 35 ++++++++++++++++ lib/hash/rte_cmp_x86.h | 60 ++------------------------ lib/hash/rte_cuckoo_hash.c | 86 +++++++++++++++++++++++++++++++++----- 4 files changed, 116 insertions(+), 121 deletions(-) create mode 100644 lib/hash/rte_cmp_generic.h diff --git a/lib/hash/rte_cmp_arm64.h b/lib/hash/rte_cmp_arm64.h index a3e85635eb..2b2a37ebd2 100644 --- a/lib/hash/rte_cmp_arm64.h +++ b/lib/hash/rte_cmp_arm64.h @@ -2,7 +2,7 @@ * Copyright(c) 2015 Cavium, Inc */ -/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ +/* Functions to compare multiple of 16 byte keys */ static inline int rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) @@ -27,59 +27,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2, static inline int rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) { - return rte_hash_k16_cmp_eq(key1, key2, key_len) || + return rte_hash_k16_cmp_eq(key1, key2, key_len) | rte_hash_k16_cmp_eq((const char *) key1 + 16, (const char *) key2 + 16, key_len); } - -static inline int -rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k16_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 16, - (const char *) key2 + 16, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static inline int -rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k32_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static inline int -rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static inline int -rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static inline int -rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 96, - (const char *) key2 + 96, key_len); -} - -static inline int -rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k64_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} diff --git a/lib/hash/rte_cmp_generic.h b/lib/hash/rte_cmp_generic.h new file mode 100644 index 0000000000..f846d562e3 --- /dev/null +++ b/lib/hash/rte_cmp_generic.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2025 Stephen Hemminger + */ + +#ifndef _RTE_CMP_GENERIC_H_ +#define _RTE_CMP_GENERIC_H_ + +/* Function to compare 16 byte keys */ +static inline int +rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ +#ifdef RTE_ARCH_64 + const uint64_t *k1 = key1; + const unaligned_uint64_t *k2 = key2; + + return ((k1[0] ^ k2[0]) | (k1[1] ^ k2[1])) != 0; +#else + const uint32_t *k1 = key1; + const unaligned_uint32_t *k2 = key2; + + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | + (k1[2] ^ k2[2]) | (k1[3] ^ k2[3]); +#endif +} + +/* Function to compare 32 byte keys */ +static inline int +rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) | + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16, + (const uint8_t *) key2 + 16, key_len); +} + +#endif diff --git a/lib/hash/rte_cmp_x86.h b/lib/hash/rte_cmp_x86.h index ddfbef462f..e7a38c8fcd 100644 --- a/lib/hash/rte_cmp_x86.h +++ b/lib/hash/rte_cmp_x86.h @@ -4,7 +4,7 @@ #include <rte_vect.h> -/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ +/* Function to compare multiple of 16 byte keys */ static inline int rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) { @@ -18,59 +18,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unu static inline int rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) { - return rte_hash_k16_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 16, - (const char *) key2 + 16, key_len); -} - -static inline int -rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k16_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 16, - (const char *) key2 + 16, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static inline int -rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k32_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static inline int -rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static inline int -rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static inline int -rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 96, - (const char *) key2 + 96, key_len); -} - -static inline int -rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k64_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); + return rte_hash_k16_cmp_eq(key1, key2, key_len) | + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16, + (const uint8_t *) key2 + 16, key_len); } diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c index 619fe0c691..199cb62bf0 100644 --- a/lib/hash/rte_cuckoo_hash.c +++ b/lib/hash/rte_cuckoo_hash.c @@ -42,13 +42,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); #define RETURN_IF_TRUE(cond, retval) #endif -#if defined(RTE_ARCH_X86) -#include "rte_cmp_x86.h" -#endif - -#if defined(RTE_ARCH_ARM64) -#include "rte_cmp_arm64.h" -#endif /* * All different options to select a key compare function, @@ -57,7 +50,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); */ enum cmp_jump_table_case { KEY_CUSTOM = 0, -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) KEY_16_BYTES, KEY_32_BYTES, KEY_48_BYTES, @@ -66,11 +58,85 @@ enum cmp_jump_table_case { KEY_96_BYTES, KEY_112_BYTES, KEY_128_BYTES, -#endif KEY_OTHER_BYTES, NUM_KEY_CMP_CASES, }; +/* + * Comparison functions for different key sizes. + * Each function is only called with a specific fixed key size. + * + * Return value is 0 on equality to allow direct use of memcmp. + * Recommend using XOR and | operator to avoid branching + * as long as key is smaller than cache line size. + * + * Key1 always points to key[] in rte_hash_key which is aligned. + * Key2 is parameter to insert which might not be. + * + * Special case for 16 and 32 bytes to allow for architecture + * specific optimizations. + */ + +#if defined(RTE_ARCH_X86) +#include "rte_cmp_x86.h" +#elif defined(RTE_ARCH_ARM64) +#include "rte_cmp_arm64.h" +#else +#include "rte_cmp_generic.h" +#endif + +static int +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) | + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16, + (const uint8_t *) key2 + 16, key_len) || + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 32, + (const uint8_t *) key2 + 32, key_len); +} + +static int +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k32_cmp_eq(key1, key2, key_len) | + rte_hash_k32_cmp_eq((const uint8_t *) key1 + 32, + (const uint8_t *) key2 + 32, key_len); +} + +static int +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 64, + (const uint8_t *) key2 + 64, key_len); +} + +static int +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64, + (const uint8_t *) key2 + 64, key_len); +} + +static int +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64, + (const uint8_t *) key2 + 64, key_len) || + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 96, + (const uint8_t *) key2 + 96, key_len); +} + +static int +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k64_cmp_eq((const uint8_t *) key1 + 64, + (const uint8_t *) key2 + 64, key_len); +} + /* Enum used to select the implementation of the signature comparison function to use * eg: a system supporting SVE might want to use a NEON or scalar implementation. */ @@ -160,7 +226,6 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) */ static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { [KEY_CUSTOM] = NULL, -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) [KEY_16_BYTES] = rte_hash_k16_cmp_eq, [KEY_32_BYTES] = rte_hash_k32_cmp_eq, [KEY_48_BYTES] = rte_hash_k48_cmp_eq, @@ -169,7 +234,6 @@ static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { [KEY_96_BYTES] = rte_hash_k96_cmp_eq, [KEY_112_BYTES] = rte_hash_k112_cmp_eq, [KEY_128_BYTES] = rte_hash_k128_cmp_eq, -#endif [KEY_OTHER_BYTES] = memcmp, }; -- 2.47.2 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v2 4/4] hash: add support for common small key sizes 2025-08-22 18:19 ` [PATCH v2 0/4] Cuckoo hash cleanup and optimizations Stephen Hemminger ` (2 preceding siblings ...) 2025-08-22 18:19 ` [PATCH v2 3/4] hash: reduce architecture special cases Stephen Hemminger @ 2025-08-22 18:19 ` Stephen Hemminger 3 siblings, 0 replies; 16+ messages in thread From: Stephen Hemminger @ 2025-08-22 18:19 UTC (permalink / raw) To: dev Cc: Stephen Hemminger, Morten Brørup, Mattias Rönnblom, Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin Add new compare functions for common small key sizes. Bugzilla ID: 1775 Suggested-by: Morten Brørup <mb@smartsharesystems.com> Reported-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_hash.c | 4 +- lib/hash/rte_cuckoo_hash.c | 100 +++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 1 deletion(-) diff --git a/app/test/test_hash.c b/app/test/test_hash.c index 5791fd7f4c..24588a4a96 100644 --- a/app/test/test_hash.c +++ b/app/test/test_hash.c @@ -35,7 +35,9 @@ */ static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc}; static uint32_t hashtest_initvals[] = {0}; -static uint32_t hashtest_key_lens[] = {0, 2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64}; +static uint32_t hashtest_key_lens[] = { + 0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 14, 15, 16, 21, 31, 32, 33, 63, 64 +}; #define MAX_KEYSIZE 64 /******************************************************************************/ #define LOCAL_FBK_HASH_ENTRIES_MAX (1 << 15) diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c index 199cb62bf0..04a2623cba 100644 --- a/lib/hash/rte_cuckoo_hash.c +++ b/lib/hash/rte_cuckoo_hash.c @@ -50,6 +50,15 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO); */ enum cmp_jump_table_case { KEY_CUSTOM = 0, + KEY_2_BYTES, + KEY_3_BYTES, + KEY_4_BYTES, + KEY_5_BYTES, + KEY_6_BYTES, + KEY_8_BYTES, + KEY_10_BYTES, + KEY_12_BYTES, + KEY_14_BYTES, KEY_16_BYTES, KEY_32_BYTES, KEY_48_BYTES, @@ -85,6 +94,88 @@ enum cmp_jump_table_case { #include "rte_cmp_generic.h" #endif +static inline int +rte_hash_k2_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint16_t *k1 = key1; + const unaligned_uint16_t *k2 = key2; + + return k1[0] ^ k2[0]; +} + +static int +rte_hash_k3_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k2_cmp_eq(key1, key2, key_len) + | (((const uint8_t *)key1)[2] ^ ((const uint8_t *)key2)[2]); +} + +static int +rte_hash_k4_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint32_t *k1 = key1; + const unaligned_uint32_t *k2 = key2; + + return k1[0] ^ k2[0]; +} + +static int +rte_hash_k5_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k4_cmp_eq(key1, key2, key_len) | + (((const uint8_t *)key1)[4] ^ ((const uint8_t *)key2)[4]); +} + +static int +rte_hash_k6_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint16_t *k1 = key1; + const unaligned_uint16_t *k2 = key2; + + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); +} + +static int +rte_hash_k8_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ +#ifdef RTE_ARCH_64 + const uint64_t *k1 = key1; + const unaligned_uint64_t *k2 = key2; + + return !!(k1[0] ^ k2[0]); +#else + const uint32_t *k1 = key1; + const unaligned_uint32_t *k2 = key2; + + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]); +#endif +} + +static int +rte_hash_k10_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k8_cmp_eq(key1, key2, key_len) | + rte_hash_k2_cmp_eq((const uint8_t *)key1 + 8, + (const uint8_t *)key2 + 8, key_len); +} + +static int +rte_hash_k12_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint32_t *k1 = key1; + const unaligned_uint32_t *k2 = key2; + + return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]); +} + +static int +rte_hash_k14_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + return rte_hash_k8_cmp_eq(key1, key2, key_len) | + rte_hash_k6_cmp_eq((const uint8_t *)key1 + 8, + (const uint8_t *)key2 + 8, key_len); +} + static int rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) { @@ -226,6 +317,15 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) */ static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { [KEY_CUSTOM] = NULL, + [KEY_2_BYTES] = rte_hash_k2_cmp_eq, + [KEY_3_BYTES] = rte_hash_k3_cmp_eq, + [KEY_4_BYTES] = rte_hash_k4_cmp_eq, + [KEY_5_BYTES] = rte_hash_k5_cmp_eq, + [KEY_6_BYTES] = rte_hash_k6_cmp_eq, + [KEY_8_BYTES] = rte_hash_k8_cmp_eq, + [KEY_10_BYTES] = rte_hash_k10_cmp_eq, + [KEY_12_BYTES] = rte_hash_k12_cmp_eq, + [KEY_14_BYTES] = rte_hash_k14_cmp_eq, [KEY_16_BYTES] = rte_hash_k16_cmp_eq, [KEY_32_BYTES] = rte_hash_k32_cmp_eq, [KEY_48_BYTES] = rte_hash_k48_cmp_eq, -- 2.47.2 ^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2025-08-22 18:57 UTC | newest] Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger 2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger 2025-08-22 9:05 ` Morten Brørup 2025-08-21 20:35 ` [RFC 2/3] hash: reduce architecture special cases Stephen Hemminger 2025-08-22 9:20 ` Morten Brørup 2025-08-21 20:35 ` [RFC 3/3] hash: add support for common small key sizes Stephen Hemminger 2025-08-22 7:19 ` Mattias Rönnblom 2025-08-22 9:50 ` Morten Brørup 2025-08-22 15:05 ` Mattias Rönnblom 2025-08-22 18:57 ` Morten Brørup 2025-08-22 16:12 ` Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 0/4] Cuckoo hash cleanup and optimizations Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 1/4] hash: move table of hash compare functions out of header Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 2/4] hash: use static_assert Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 3/4] hash: reduce architecture special cases Stephen Hemminger 2025-08-22 18:19 ` [PATCH v2 4/4] hash: add support for common small key sizes Stephen Hemminger
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).