DPDK patches and discussions
 help / color / mirror / Atom feed
* [RFC 0/3] hash: optimize compare logic
@ 2025-08-21 20:35 Stephen Hemminger
  2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Stephen Hemminger @ 2025-08-21 20:35 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Recent discussion around handling small keys motivated furthur
examination of the compare logic.

Stephen Hemminger (3):
  hash: move table of hash compare functions out of header
  hash: reduce architecture special cases
  hash: add support for common small key sizes

 lib/hash/rte_cmp_arm64.h   |  52 ----------
 lib/hash/rte_cmp_x86.h     |  56 +----------
 lib/hash/rte_cuckoo_hash.c | 194 +++++++++++++++++++++++++++++++++++--
 lib/hash/rte_cuckoo_hash.h |  79 +--------------
 4 files changed, 187 insertions(+), 194 deletions(-)

-- 
2.47.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC 1/3] hash: move table of hash compare functions out of header
  2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger
@ 2025-08-21 20:35 ` Stephen Hemminger
  2025-08-22  9:05   ` Morten Brørup
  2025-08-21 20:35 ` [RFC 2/3] hash: reduce architecture special cases Stephen Hemminger
  2025-08-21 20:35 ` [RFC 3/3] hash: add support for common small key sizes Stephen Hemminger
  2 siblings, 1 reply; 7+ messages in thread
From: Stephen Hemminger @ 2025-08-21 20:35 UTC (permalink / raw)
  To: dev
  Cc: Stephen Hemminger, Yipeng Wang, Sameh Gobriel, Bruce Richardson,
	Vladimir Medvedkin

Remove the definition of the compare jump table from the
header file so the internal details are not exposed.
Prevents future ABI breakage if new sizes are added.

Make other macros local if possible, header should
only contain exposed API.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/hash/rte_cuckoo_hash.c | 74 ++++++++++++++++++++++++++++++-----
 lib/hash/rte_cuckoo_hash.h | 79 +-------------------------------------
 2 files changed, 65 insertions(+), 88 deletions(-)

diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index 2c92c51624..619fe0c691 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -25,14 +25,51 @@
 #include <rte_tailq.h>
 
 #include "rte_hash.h"
+#include "rte_cuckoo_hash.h"
 
-/* needs to be before rte_cuckoo_hash.h */
 RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
 #define RTE_LOGTYPE_HASH hash_logtype
 #define HASH_LOG(level, ...) \
 	RTE_LOG_LINE(level, HASH, "" __VA_ARGS__)
 
-#include "rte_cuckoo_hash.h"
+/* Macro to enable/disable run-time checking of function parameters */
+#if defined(RTE_LIBRTE_HASH_DEBUG)
+#define RETURN_IF_TRUE(cond, retval) do { \
+	if (cond) \
+		return retval; \
+} while (0)
+#else
+#define RETURN_IF_TRUE(cond, retval)
+#endif
+
+#if defined(RTE_ARCH_X86)
+#include "rte_cmp_x86.h"
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#include "rte_cmp_arm64.h"
+#endif
+
+/*
+ * All different options to select a key compare function,
+ * based on the key size and custom function.
+ * Not in rte_cuckoo_hash.h to avoid ABI issues.
+ */
+enum cmp_jump_table_case {
+	KEY_CUSTOM = 0,
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+	KEY_16_BYTES,
+	KEY_32_BYTES,
+	KEY_48_BYTES,
+	KEY_64_BYTES,
+	KEY_80_BYTES,
+	KEY_96_BYTES,
+	KEY_112_BYTES,
+	KEY_128_BYTES,
+#endif
+	KEY_OTHER_BYTES,
+	NUM_KEY_CMP_CASES,
+};
 
 /* Enum used to select the implementation of the signature comparison function to use
  * eg: a system supporting SVE might want to use a NEON or scalar implementation.
@@ -117,6 +154,25 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
 	h->rte_hash_custom_cmp_eq = func;
 }
 
+/*
+ * Table storing all different key compare functions
+ * (multi-process supported)
+ */
+static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
+	[KEY_CUSTOM] = NULL,
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+	[KEY_16_BYTES] = rte_hash_k16_cmp_eq,
+	[KEY_32_BYTES] = rte_hash_k32_cmp_eq,
+	[KEY_48_BYTES] = rte_hash_k48_cmp_eq,
+	[KEY_64_BYTES] = rte_hash_k64_cmp_eq,
+	[KEY_80_BYTES] = rte_hash_k80_cmp_eq,
+	[KEY_96_BYTES] = rte_hash_k96_cmp_eq,
+	[KEY_112_BYTES] = rte_hash_k112_cmp_eq,
+	[KEY_128_BYTES] = rte_hash_k128_cmp_eq,
+#endif
+	[KEY_OTHER_BYTES] = memcmp,
+};
+
 static inline int
 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
 {
@@ -390,13 +446,13 @@ rte_hash_create(const struct rte_hash_parameters *params)
 		goto err_unlock;
 	}
 
-/*
- * If x86 architecture is used, select appropriate compare function,
- * which may use x86 intrinsics, otherwise use memcmp
- */
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
 	/* Select function to compare keys */
 	switch (params->key_len) {
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+	/*
+	 * If x86 architecture is used, select appropriate compare function,
+	 * which may use x86 intrinsics, otherwise use memcmp
+	 */
 	case 16:
 		h->cmp_jump_table_idx = KEY_16_BYTES;
 		break;
@@ -421,13 +477,11 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	case 128:
 		h->cmp_jump_table_idx = KEY_128_BYTES;
 		break;
+#endif
 	default:
 		/* If key is not multiple of 16, use generic memcmp */
 		h->cmp_jump_table_idx = KEY_OTHER_BYTES;
 	}
-#else
-	h->cmp_jump_table_idx = KEY_OTHER_BYTES;
-#endif
 
 	if (use_local_cache) {
 		local_free_slots = rte_zmalloc_socket(NULL,
diff --git a/lib/hash/rte_cuckoo_hash.h b/lib/hash/rte_cuckoo_hash.h
index 26a992419a..16fe999c4c 100644
--- a/lib/hash/rte_cuckoo_hash.h
+++ b/lib/hash/rte_cuckoo_hash.h
@@ -12,86 +12,9 @@
 #define _RTE_CUCKOO_HASH_H_
 
 #include <stdalign.h>
-
-#if defined(RTE_ARCH_X86)
-#include "rte_cmp_x86.h"
-#endif
-
-#if defined(RTE_ARCH_ARM64)
-#include "rte_cmp_arm64.h"
-#endif
-
-/* Macro to enable/disable run-time checking of function parameters */
-#if defined(RTE_LIBRTE_HASH_DEBUG)
-#define RETURN_IF_TRUE(cond, retval) do { \
-	if (cond) \
-		return retval; \
-} while (0)
-#else
-#define RETURN_IF_TRUE(cond, retval)
-#endif
-
 #include <rte_hash_crc.h>
 #include <rte_jhash.h>
 
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-/*
- * All different options to select a key compare function,
- * based on the key size and custom function.
- */
-enum cmp_jump_table_case {
-	KEY_CUSTOM = 0,
-	KEY_16_BYTES,
-	KEY_32_BYTES,
-	KEY_48_BYTES,
-	KEY_64_BYTES,
-	KEY_80_BYTES,
-	KEY_96_BYTES,
-	KEY_112_BYTES,
-	KEY_128_BYTES,
-	KEY_OTHER_BYTES,
-	NUM_KEY_CMP_CASES,
-};
-
-/*
- * Table storing all different key compare functions
- * (multi-process supported)
- */
-const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
-	NULL,
-	rte_hash_k16_cmp_eq,
-	rte_hash_k32_cmp_eq,
-	rte_hash_k48_cmp_eq,
-	rte_hash_k64_cmp_eq,
-	rte_hash_k80_cmp_eq,
-	rte_hash_k96_cmp_eq,
-	rte_hash_k112_cmp_eq,
-	rte_hash_k128_cmp_eq,
-	memcmp
-};
-#else
-/*
- * All different options to select a key compare function,
- * based on the key size and custom function.
- */
-enum cmp_jump_table_case {
-	KEY_CUSTOM = 0,
-	KEY_OTHER_BYTES,
-	NUM_KEY_CMP_CASES,
-};
-
-/*
- * Table storing all different key compare functions
- * (multi-process supported)
- */
-const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
-	NULL,
-	memcmp
-};
-
-#endif
-
-
 /**
  * Number of items per bucket.
  * 8 is a tradeoff between performance and memory consumption.
@@ -189,7 +112,7 @@ struct __rte_cache_aligned rte_hash {
 	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
 	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
 	/**< Custom function used to compare keys. */
-	enum cmp_jump_table_case cmp_jump_table_idx;
+	unsigned int cmp_jump_table_idx;
 	/**< Indicates which compare function to use. */
 	unsigned int sig_cmp_fn;
 	/**< Indicates which signature compare function to use. */
-- 
2.47.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC 2/3] hash: reduce architecture special cases
  2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger
  2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger
@ 2025-08-21 20:35 ` Stephen Hemminger
  2025-08-22  9:20   ` Morten Brørup
  2025-08-21 20:35 ` [RFC 3/3] hash: add support for common small key sizes Stephen Hemminger
  2 siblings, 1 reply; 7+ messages in thread
From: Stephen Hemminger @ 2025-08-21 20:35 UTC (permalink / raw)
  To: dev
  Cc: Stephen Hemminger, Wathsala Vithanage, Yipeng Wang,
	Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin

Make comparison of power of 2 sizes compatible across platforms.
Keep the special case code for 16 bytes for x86 and arm64 but
also add simple xor for others.

Need to make rte_hash_k32_cmp_eq() exposed because ip_frag
code poaches it.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/hash/rte_cmp_arm64.h   | 52 ----------------------
 lib/hash/rte_cmp_x86.h     | 56 +-----------------------
 lib/hash/rte_cuckoo_hash.c | 90 +++++++++++++++++++++++++++++++++-----
 3 files changed, 80 insertions(+), 118 deletions(-)

diff --git a/lib/hash/rte_cmp_arm64.h b/lib/hash/rte_cmp_arm64.h
index a3e85635eb..c7c56526d0 100644
--- a/lib/hash/rte_cmp_arm64.h
+++ b/lib/hash/rte_cmp_arm64.h
@@ -31,55 +31,3 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
 		rte_hash_k16_cmp_eq((const char *) key1 + 16,
 				(const char *) key2 + 16, key_len);
 }
-
-static inline int
-rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k16_cmp_eq((const char *) key1 + 16,
-				(const char *) key2 + 16, key_len) ||
-		rte_hash_k16_cmp_eq((const char *) key1 + 32,
-				(const char *) key2 + 32, key_len);
-}
-
-static inline int
-rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k32_cmp_eq((const char *) key1 + 32,
-				(const char *) key2 + 32, key_len);
-}
-
-static inline int
-rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k16_cmp_eq((const char *) key1 + 64,
-				(const char *) key2 + 64, key_len);
-}
-
-static inline int
-rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k32_cmp_eq((const char *) key1 + 64,
-				(const char *) key2 + 64, key_len);
-}
-
-static inline int
-rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k32_cmp_eq((const char *) key1 + 64,
-				(const char *) key2 + 64, key_len) ||
-		rte_hash_k16_cmp_eq((const char *) key1 + 96,
-				(const char *) key2 + 96, key_len);
-}
-
-static inline int
-rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k64_cmp_eq((const char *) key1 + 64,
-				(const char *) key2 + 64, key_len);
-}
diff --git a/lib/hash/rte_cmp_x86.h b/lib/hash/rte_cmp_x86.h
index ddfbef462f..62ac320b25 100644
--- a/lib/hash/rte_cmp_x86.h
+++ b/lib/hash/rte_cmp_x86.h
@@ -19,58 +19,6 @@ static inline int
 rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
 {
 	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k16_cmp_eq((const char *) key1 + 16,
-				(const char *) key2 + 16, key_len);
-}
-
-static inline int
-rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k16_cmp_eq((const char *) key1 + 16,
-				(const char *) key2 + 16, key_len) ||
-		rte_hash_k16_cmp_eq((const char *) key1 + 32,
-				(const char *) key2 + 32, key_len);
-}
-
-static inline int
-rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k32_cmp_eq((const char *) key1 + 32,
-				(const char *) key2 + 32, key_len);
-}
-
-static inline int
-rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k16_cmp_eq((const char *) key1 + 64,
-				(const char *) key2 + 64, key_len);
-}
-
-static inline int
-rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k32_cmp_eq((const char *) key1 + 64,
-				(const char *) key2 + 64, key_len);
-}
-
-static inline int
-rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k32_cmp_eq((const char *) key1 + 64,
-				(const char *) key2 + 64, key_len) ||
-		rte_hash_k16_cmp_eq((const char *) key1 + 96,
-				(const char *) key2 + 96, key_len);
-}
-
-static inline int
-rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
-	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
-		rte_hash_k64_cmp_eq((const char *) key1 + 64,
-				(const char *) key2 + 64, key_len);
+		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
+				(const uint8_t *) key2 + 16, key_len);
 }
diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index 619fe0c691..3212695d92 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -42,14 +42,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
 #define RETURN_IF_TRUE(cond, retval)
 #endif
 
-#if defined(RTE_ARCH_X86)
-#include "rte_cmp_x86.h"
-#endif
-
-#if defined(RTE_ARCH_ARM64)
-#include "rte_cmp_arm64.h"
-#endif
-
 /*
  * All different options to select a key compare function,
  * based on the key size and custom function.
@@ -57,7 +49,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
  */
 enum cmp_jump_table_case {
 	KEY_CUSTOM = 0,
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
 	KEY_16_BYTES,
 	KEY_32_BYTES,
 	KEY_48_BYTES,
@@ -66,11 +57,88 @@ enum cmp_jump_table_case {
 	KEY_96_BYTES,
 	KEY_112_BYTES,
 	KEY_128_BYTES,
-#endif
 	KEY_OTHER_BYTES,
 	NUM_KEY_CMP_CASES,
 };
 
+
+#if defined(RTE_ARCH_X86)
+#include "rte_cmp_x86.h"
+#elif defined(RTE_ARCH_ARM64)
+#include "rte_cmp_arm64.h"
+#else
+
+static inline int
+rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
+{
+	const uint64_t *k1 = key1;
+	const uint64_t *k2 = key2;
+
+	return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]);
+}
+
+static inline int
+rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
+				(const uint8_t *) key2 + 16, key_len);
+}
+#endif
+
+
+static inline int
+rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
+				(const uint8_t *) key2 + 16, key_len) ||
+		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 32,
+				(const uint8_t *) key2 + 32, key_len);
+}
+
+static inline int
+rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k32_cmp_eq((const uint8_t *) key1 + 32,
+				(const uint8_t *) key2 + 32, key_len);
+}
+
+static inline int
+rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 64,
+				(const uint8_t *) key2 + 64, key_len);
+}
+
+static inline int
+rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64,
+				(const uint8_t *) key2 + 64, key_len);
+}
+
+static inline int
+rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64,
+				(const uint8_t *) key2 + 64, key_len) ||
+		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 96,
+				(const uint8_t *) key2 + 96, key_len);
+}
+
+static inline int
+rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k64_cmp_eq((const uint8_t *) key1 + 64,
+				(const uint8_t *) key2 + 64, key_len);
+}
+
 /* Enum used to select the implementation of the signature comparison function to use
  * eg: a system supporting SVE might want to use a NEON or scalar implementation.
  */
@@ -160,7 +228,6 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
  */
 static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
 	[KEY_CUSTOM] = NULL,
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
 	[KEY_16_BYTES] = rte_hash_k16_cmp_eq,
 	[KEY_32_BYTES] = rte_hash_k32_cmp_eq,
 	[KEY_48_BYTES] = rte_hash_k48_cmp_eq,
@@ -169,7 +236,6 @@ static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
 	[KEY_96_BYTES] = rte_hash_k96_cmp_eq,
 	[KEY_112_BYTES] = rte_hash_k112_cmp_eq,
 	[KEY_128_BYTES] = rte_hash_k128_cmp_eq,
-#endif
 	[KEY_OTHER_BYTES] = memcmp,
 };
 
-- 
2.47.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC 3/3] hash: add support for common small key sizes
  2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger
  2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger
  2025-08-21 20:35 ` [RFC 2/3] hash: reduce architecture special cases Stephen Hemminger
@ 2025-08-21 20:35 ` Stephen Hemminger
  2025-08-22  7:19   ` Mattias Rönnblom
  2 siblings, 1 reply; 7+ messages in thread
From: Stephen Hemminger @ 2025-08-21 20:35 UTC (permalink / raw)
  To: dev
  Cc: Stephen Hemminger, Morten Brørup, Mattias Rönnblom,
	Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin

Add new compare functions for common small key sizes.

Bugzilla ID: 1775
Suggested-by: Morten Brørup <mb@smartsharesystems.com>
Reported-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/hash/rte_cuckoo_hash.c | 54 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index 3212695d92..825889c320 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -49,6 +49,11 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
  */
 enum cmp_jump_table_case {
 	KEY_CUSTOM = 0,
+	KEY_2_BYTES,
+	KEY_4_BYTES,
+	KEY_6_BYTES,
+	KEY_8_BYTES,
+	KEY_12_BYTES,
 	KEY_16_BYTES,
 	KEY_32_BYTES,
 	KEY_48_BYTES,
@@ -86,6 +91,50 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
 }
 #endif
 
+static inline int
+rte_hash_k2_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
+{
+	const uint16_t *k1 = key1;
+	const uint16_t *k2 = key2;
+
+	return k1[0] ^ k2[0];
+}
+
+static inline int
+rte_hash_k4_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
+{
+	const uint32_t *k1 = key1;
+	const uint32_t *k2 = key2;
+
+	return k1[0] ^ k2[0];
+}
+
+static inline int
+rte_hash_k6_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
+{
+	const uint16_t *k1 = key1;
+	const uint16_t *k2 = key2;
+
+	return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]);
+}
+
+static inline int
+rte_hash_k8_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
+{
+	const uint64_t *k1 = key1;
+	const uint64_t *k2 = key2;
+
+	return k1[0] ^ k2[0];
+}
+
+static inline int
+rte_hash_k12_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
+{
+	const uint32_t *k1 = key1;
+	const uint32_t *k2 = key2;
+
+	return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]);
+}
 
 static inline int
 rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
@@ -228,6 +277,11 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
  */
 static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
 	[KEY_CUSTOM] = NULL,
+	[KEY_2_BYTES] = rte_hash_k2_cmp_eq,
+	[KEY_4_BYTES] = rte_hash_k4_cmp_eq,
+	[KEY_6_BYTES] = rte_hash_k6_cmp_eq,
+	[KEY_8_BYTES] = rte_hash_k8_cmp_eq,
+	[KEY_12_BYTES] = rte_hash_k12_cmp_eq,
 	[KEY_16_BYTES] = rte_hash_k16_cmp_eq,
 	[KEY_32_BYTES] = rte_hash_k32_cmp_eq,
 	[KEY_48_BYTES] = rte_hash_k48_cmp_eq,
-- 
2.47.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC 3/3] hash: add support for common small key sizes
  2025-08-21 20:35 ` [RFC 3/3] hash: add support for common small key sizes Stephen Hemminger
@ 2025-08-22  7:19   ` Mattias Rönnblom
  0 siblings, 0 replies; 7+ messages in thread
From: Mattias Rönnblom @ 2025-08-22  7:19 UTC (permalink / raw)
  To: Stephen Hemminger, dev
  Cc: Morten Brørup, Mattias Rönnblom, Yipeng Wang,
	Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin

On 2025-08-21 22:35, Stephen Hemminger wrote:
> Add new compare functions for common small key sizes.
> 
> Bugzilla ID: 1775
> Suggested-by: Morten Brørup <mb@smartsharesystems.com>
> Reported-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>   lib/hash/rte_cuckoo_hash.c | 54 ++++++++++++++++++++++++++++++++++++++
>   1 file changed, 54 insertions(+)
> 
> diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
> index 3212695d92..825889c320 100644
> --- a/lib/hash/rte_cuckoo_hash.c
> +++ b/lib/hash/rte_cuckoo_hash.c
> @@ -49,6 +49,11 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
>    */
>   enum cmp_jump_table_case {
>   	KEY_CUSTOM = 0,
> +	KEY_2_BYTES,
> +	KEY_4_BYTES,
> +	KEY_6_BYTES,
> +	KEY_8_BYTES,
> +	KEY_12_BYTES,
>   	KEY_16_BYTES,
>   	KEY_32_BYTES,
>   	KEY_48_BYTES,
> @@ -86,6 +91,50 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
>   }
>   #endif
>   
> +static inline int
> +rte_hash_k2_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
> +{
> +	const uint16_t *k1 = key1;
> +	const uint16_t *k2 = key2;
> +

What we do now is to require the keys are 16-bit aligned (which wasn't 
the case before).

You could

uint16_t k1;
memcpy(&k1, key1, sizeof(uint16_t));
instead.

Would generate the same code, but be safe from any future alignment issues.

Anyway, maybe it's safe to assume the keys are aligned, so this is not 
an issue.

> +	return k1[0] ^ k2[0];
> +}

Haven't you implemented "neq" rather than "eq" here? If the keys are 
equal, the result is 0. Should be != 0.

Would it be worth adding a comment like "use XOR to make this 
branch-free"? It may not be obvious to all readers.

That said, I’m not sure this trick will actually change the generated 
object code - especially if the result of the eq function is still used 
in a conditional afterward. Anyway, keeping it seems like a good 
conservative approach.

> +
> +static inline int
> +rte_hash_k4_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
> +{
> +	const uint32_t *k1 = key1;
> +	const uint32_t *k2 = key2;
> +
> +	return k1[0] ^ k2[0];
> +}
> +
> +static inline int
> +rte_hash_k6_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
> +{
> +	const uint16_t *k1 = key1;
> +	const uint16_t *k2 = key2;
> +
> +	return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]);
> +}
> +
> +static inline int
> +rte_hash_k8_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
> +{
> +	const uint64_t *k1 = key1;
> +	const uint64_t *k2 = key2;
> +
> +	return k1[0] ^ k2[0];
> +}
> +
> +static inline int
> +rte_hash_k12_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
> +{
> +	const uint32_t *k1 = key1;
> +	const uint32_t *k2 = key2;
> +
> +	return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) | (k1[2] ^ k2[2]);
> +}
>   
>   static inline int
>   rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> @@ -228,6 +277,11 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
>    */
>   static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
>   	[KEY_CUSTOM] = NULL,
> +	[KEY_2_BYTES] = rte_hash_k2_cmp_eq,
> +	[KEY_4_BYTES] = rte_hash_k4_cmp_eq,
> +	[KEY_6_BYTES] = rte_hash_k6_cmp_eq,
> +	[KEY_8_BYTES] = rte_hash_k8_cmp_eq,
> +	[KEY_12_BYTES] = rte_hash_k12_cmp_eq,
>   	[KEY_16_BYTES] = rte_hash_k16_cmp_eq,
>   	[KEY_32_BYTES] = rte_hash_k32_cmp_eq,
>   	[KEY_48_BYTES] = rte_hash_k48_cmp_eq,


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [RFC 1/3] hash: move table of hash compare functions out of header
  2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger
@ 2025-08-22  9:05   ` Morten Brørup
  0 siblings, 0 replies; 7+ messages in thread
From: Morten Brørup @ 2025-08-22  9:05 UTC (permalink / raw)
  To: Stephen Hemminger, dev
  Cc: Yipeng Wang, Sameh Gobriel, Bruce Richardson, Vladimir Medvedkin

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Thursday, 21 August 2025 22.35
> 
> Remove the definition of the compare jump table from the
> header file so the internal details are not exposed.
> Prevents future ABI breakage if new sizes are added.
> 
> Make other macros local if possible, header should
> only contain exposed API.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---

[...]

> +/*
> + * All different options to select a key compare function,
> + * based on the key size and custom function.
> + * Not in rte_cuckoo_hash.h to avoid ABI issues.
> + */
> +enum cmp_jump_table_case {
> +	KEY_CUSTOM = 0,
> +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> +	KEY_16_BYTES,
> +	KEY_32_BYTES,
> +	KEY_48_BYTES,
> +	KEY_64_BYTES,
> +	KEY_80_BYTES,
> +	KEY_96_BYTES,
> +	KEY_112_BYTES,
> +	KEY_128_BYTES,
> +#endif
> +	KEY_OTHER_BYTES,
> +	NUM_KEY_CMP_CASES,
> +};
> 
>  /* Enum used to select the implementation of the signature comparison
> function to use
>   * eg: a system supporting SVE might want to use a NEON or scalar
> implementation.
> @@ -117,6 +154,25 @@ void rte_hash_set_cmp_func(struct rte_hash *h,
> rte_hash_cmp_eq_t func)
>  	h->rte_hash_custom_cmp_eq = func;
>  }
> 
> +/*
> + * Table storing all different key compare functions
> + * (multi-process supported)
> + */
> +static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
> +	[KEY_CUSTOM] = NULL,
> +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> +	[KEY_16_BYTES] = rte_hash_k16_cmp_eq,
> +	[KEY_32_BYTES] = rte_hash_k32_cmp_eq,
> +	[KEY_48_BYTES] = rte_hash_k48_cmp_eq,
> +	[KEY_64_BYTES] = rte_hash_k64_cmp_eq,
> +	[KEY_80_BYTES] = rte_hash_k80_cmp_eq,
> +	[KEY_96_BYTES] = rte_hash_k96_cmp_eq,
> +	[KEY_112_BYTES] = rte_hash_k112_cmp_eq,
> +	[KEY_128_BYTES] = rte_hash_k128_cmp_eq,
> +#endif
> +	[KEY_OTHER_BYTES] = memcmp,
> +};

Nice trick explicitly indexing these here; it reduces the risk of not matching the cmp_jump_table_case.

Consider adding static_assert() that RTE_DIM(cmp_jump_table) == NUM_KEY_CMP_CASES.

With or without suggested static_assert()...
Acked-by: Morten Brørup <mb@smartsharesystems.com>

Good cleanup!


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [RFC 2/3] hash: reduce architecture special cases
  2025-08-21 20:35 ` [RFC 2/3] hash: reduce architecture special cases Stephen Hemminger
@ 2025-08-22  9:20   ` Morten Brørup
  0 siblings, 0 replies; 7+ messages in thread
From: Morten Brørup @ 2025-08-22  9:20 UTC (permalink / raw)
  To: Stephen Hemminger, dev
  Cc: Wathsala Vithanage, Yipeng Wang, Sameh Gobriel, Bruce Richardson,
	Vladimir Medvedkin, Mattias Rönnblom

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Thursday, 21 August 2025 22.35
> 
> Make comparison of power of 2 sizes compatible across platforms.
> Keep the special case code for 16 bytes for x86 and arm64 but
> also add simple xor for others.
> 
> Need to make rte_hash_k32_cmp_eq() exposed because ip_frag
> code poaches it.

One more argument why these architecture optimized compare functions should be exposed as generic EAL functions, like rte_mov16() etc. for rte_memcpy(), instead of hiding them away in the hash library.

It could be rte_memeq16(const void *, const void *) etc. in /lib/eal/include, with architecture optimized variants, and a generic variant.

And a wrapper calling them in the hash library, such as:

static inline int
rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
{
	return !rte_memeq16(key1, key2);
}

> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---

With our without suggested feature creep...

Acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-08-22  9:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger
2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger
2025-08-22  9:05   ` Morten Brørup
2025-08-21 20:35 ` [RFC 2/3] hash: reduce architecture special cases Stephen Hemminger
2025-08-22  9:20   ` Morten Brørup
2025-08-21 20:35 ` [RFC 3/3] hash: add support for common small key sizes Stephen Hemminger
2025-08-22  7:19   ` Mattias Rönnblom

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).