DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Mattias Rönnblom" <hofors@lysator.liu.se>
To: Stephen Hemminger <stephen@networkplumber.org>, dev@dpdk.org
Cc: Wathsala Vithanage <wathsala.vithanage@arm.com>,
	Yipeng Wang <yipeng1.wang@intel.com>,
	Sameh Gobriel <sameh.gobriel@intel.com>,
	Bruce Richardson <bruce.richardson@intel.com>,
	Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Subject: Re: [PATCH v2 3/4] hash: reduce architecture special cases
Date: Tue, 26 Aug 2025 16:36:38 +0200	[thread overview]
Message-ID: <05cd927d-e4e9-441f-b247-0bdc138c8de0@lysator.liu.se> (raw)
In-Reply-To: <20250822182110.27599-4-stephen@networkplumber.org>

On 2025-08-22 20:19, Stephen Hemminger wrote:
> Make comparison of sizes compatible across platforms.
> Keep the special case code for 16 bytes for x86 and arm64 but
> also add simple xor for others.
> 
> Need to keep rte_hash_k32_cmp_eq() exposed because ip_frag
> code poaches it.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>   lib/hash/rte_cmp_arm64.h   | 56 +------------------------
>   lib/hash/rte_cmp_generic.h | 35 ++++++++++++++++
>   lib/hash/rte_cmp_x86.h     | 60 ++------------------------
>   lib/hash/rte_cuckoo_hash.c | 86 +++++++++++++++++++++++++++++++++-----
>   4 files changed, 116 insertions(+), 121 deletions(-)
>   create mode 100644 lib/hash/rte_cmp_generic.h
> 
> diff --git a/lib/hash/rte_cmp_arm64.h b/lib/hash/rte_cmp_arm64.h
> index a3e85635eb..2b2a37ebd2 100644
> --- a/lib/hash/rte_cmp_arm64.h
> +++ b/lib/hash/rte_cmp_arm64.h
> @@ -2,7 +2,7 @@
>    * Copyright(c) 2015 Cavium, Inc
>    */
>   
> -/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
> +/* Functions to compare multiple of 16 byte keys */
>   static inline int
>   rte_hash_k16_cmp_eq(const void *key1, const void *key2,
>   		    size_t key_len __rte_unused)
> @@ -27,59 +27,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2,
>   static inline int
>   rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
>   {
> -	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> +	return rte_hash_k16_cmp_eq(key1, key2, key_len) |
>   		rte_hash_k16_cmp_eq((const char *) key1 + 16,
>   				(const char *) key2 + 16, key_len);
>   }
> -
> -static inline int
> -rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 16,
> -				(const char *) key2 + 16, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 32,
> -				(const char *) key2 + 32, key_len);
> -}
> -
> -static inline int
> -rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k32_cmp_eq((const char *) key1 + 32,
> -				(const char *) key2 + 32, key_len);
> -}
> -
> -static inline int
> -rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len);
> -}
> -
> -static inline int
> -rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k32_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len);
> -}
> -
> -static inline int
> -rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k32_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 96,
> -				(const char *) key2 + 96, key_len);
> -}
> -
> -static inline int
> -rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k64_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len);
> -}
> diff --git a/lib/hash/rte_cmp_generic.h b/lib/hash/rte_cmp_generic.h
> new file mode 100644
> index 0000000000..f846d562e3
> --- /dev/null
> +++ b/lib/hash/rte_cmp_generic.h
> @@ -0,0 +1,35 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2025 Stephen Hemminger
> + */
> +
> +#ifndef _RTE_CMP_GENERIC_H_
> +#define _RTE_CMP_GENERIC_H_
> +
> +/* Function to compare 16 byte keys */
> +static inline int
> +rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
> +{
> +#ifdef RTE_ARCH_64
> +	const uint64_t *k1 = key1;
> +	const unaligned_uint64_t *k2 = key2;
> +
> +	return ((k1[0] ^ k2[0]) | (k1[1] ^ k2[1])) != 0;

Remove '!= 0'.

> +#else
> +	const uint32_t *k1 = key1;
> +	const unaligned_uint32_t *k2 = key2;
> +
> +	return (k1[0] ^ k2[0]) | (k1[1] ^ k2[1]) |
> +	       (k1[2] ^ k2[2]) | (k1[3] ^ k2[3]);
> +#endif
> +}
> +
> +/* Function to compare 32 byte keys */
> +static inline int
> +rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)

eq -> neq not worth the trouble. Correct? Some other patch set.

> +{
> +	return rte_hash_k16_cmp_eq(key1, key2, key_len) |
> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
> +				(const uint8_t *) key2 + 16, key_len);
> +}
> +
> +#endif
> diff --git a/lib/hash/rte_cmp_x86.h b/lib/hash/rte_cmp_x86.h
> index ddfbef462f..e7a38c8fcd 100644
> --- a/lib/hash/rte_cmp_x86.h
> +++ b/lib/hash/rte_cmp_x86.h
> @@ -4,7 +4,7 @@
>   
>   #include <rte_vect.h>
>   
> -/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
> +/* Function to compare multiple of 16 byte keys */
>   static inline int
>   rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
>   {
> @@ -18,59 +18,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unu
>   static inline int
>   rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
>   {
> -	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 16,
> -				(const char *) key2 + 16, key_len);
> -}
> -
> -static inline int
> -rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 16,
> -				(const char *) key2 + 16, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 32,
> -				(const char *) key2 + 32, key_len);
> -}
> -
> -static inline int
> -rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k32_cmp_eq((const char *) key1 + 32,
> -				(const char *) key2 + 32, key_len);
> -}
> -
> -static inline int
> -rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len);
> -}
> -
> -static inline int
> -rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k32_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len);
> -}
> -
> -static inline int
> -rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k32_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 96,
> -				(const char *) key2 + 96, key_len);
> -}
> -
> -static inline int
> -rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k64_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len);
> +	return rte_hash_k16_cmp_eq(key1, key2, key_len) |
> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
> +				(const uint8_t *) key2 + 16, key_len);
>   }
> diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
> index 619fe0c691..199cb62bf0 100644
> --- a/lib/hash/rte_cuckoo_hash.c
> +++ b/lib/hash/rte_cuckoo_hash.c
> @@ -42,13 +42,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
>   #define RETURN_IF_TRUE(cond, retval)
>   #endif
>   
> -#if defined(RTE_ARCH_X86)
> -#include "rte_cmp_x86.h"
> -#endif
> -
> -#if defined(RTE_ARCH_ARM64)
> -#include "rte_cmp_arm64.h"
> -#endif
>   
>   /*
>    * All different options to select a key compare function,
> @@ -57,7 +50,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
>    */
>   enum cmp_jump_table_case {
>   	KEY_CUSTOM = 0,
> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
>   	KEY_16_BYTES,
>   	KEY_32_BYTES,
>   	KEY_48_BYTES,
> @@ -66,11 +58,85 @@ enum cmp_jump_table_case {
>   	KEY_96_BYTES,
>   	KEY_112_BYTES,
>   	KEY_128_BYTES,
> -#endif
>   	KEY_OTHER_BYTES,
>   	NUM_KEY_CMP_CASES,
>   };
>   
> +/*
> + * Comparison functions for different key sizes.
> + * Each function is only called with a specific fixed key size.
> + *
> + * Return value is 0 on equality to allow direct use of memcmp.
> + * Recommend using XOR and | operator to avoid branching
> + * as long as key is smaller than cache line size.
> + *
> + * Key1 always points to key[] in rte_hash_key which is aligned.
> + * Key2 is parameter to insert which might not be.
> + *
> + * Special case for 16 and 32 bytes to allow for architecture
> + * specific optimizations.
> + */
> +
> +#if defined(RTE_ARCH_X86)
> +#include "rte_cmp_x86.h"
> +#elif defined(RTE_ARCH_ARM64)
> +#include "rte_cmp_arm64.h"
> +#else
> +#include "rte_cmp_generic.h"
> +#endif
> +
> +static int
> +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k16_cmp_eq(key1, key2, key_len) |
> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
> +				    (const uint8_t *) key2 + 16, key_len) ||

'||' -> '|'?

> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 32,
> +				    (const uint8_t *) key2 + 32, key_len);
> +}
> +
> +static int
> +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k32_cmp_eq(key1, key2, key_len) |
> +		rte_hash_k32_cmp_eq((const uint8_t *) key1 + 32,
> +				    (const uint8_t *) key2 + 32, key_len);
> +}
> +
> +static int
> +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||

'||' -> '|'

Same in many of the below _eq functions.

> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 64,
> +				    (const uint8_t *) key2 + 64, key_len);
> +}
> +
> +static int
> +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> +		rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64,
> +				    (const uint8_t *) key2 + 64, key_len);
> +}
> +
> +static int
> +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> +		rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64,
> +				    (const uint8_t *) key2 + 64, key_len) ||
> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 96,
> +				    (const uint8_t *) key2 + 96, key_len);
> +}
> +
> +static int
> +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> +		rte_hash_k64_cmp_eq((const uint8_t *) key1 + 64,
> +				(const uint8_t *) key2 + 64, key_len);
> +}
> +
>   /* Enum used to select the implementation of the signature comparison function to use
>    * eg: a system supporting SVE might want to use a NEON or scalar implementation.
>    */
> @@ -160,7 +226,6 @@ void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
>    */
>   static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
>   	[KEY_CUSTOM] = NULL,
> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
>   	[KEY_16_BYTES] = rte_hash_k16_cmp_eq,
>   	[KEY_32_BYTES] = rte_hash_k32_cmp_eq,
>   	[KEY_48_BYTES] = rte_hash_k48_cmp_eq,
> @@ -169,7 +234,6 @@ static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
>   	[KEY_96_BYTES] = rte_hash_k96_cmp_eq,
>   	[KEY_112_BYTES] = rte_hash_k112_cmp_eq,
>   	[KEY_128_BYTES] = rte_hash_k128_cmp_eq,
> -#endif
>   	[KEY_OTHER_BYTES] = memcmp,
>   };
>   


  parent reply	other threads:[~2025-08-26 14:36 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-21 20:35 [RFC 0/3] hash: optimize compare logic Stephen Hemminger
2025-08-21 20:35 ` [RFC 1/3] hash: move table of hash compare functions out of header Stephen Hemminger
2025-08-22  9:05   ` Morten Brørup
2025-08-22 16:50     ` Stephen Hemminger
2025-08-21 20:35 ` [RFC 2/3] hash: reduce architecture special cases Stephen Hemminger
2025-08-22  9:20   ` Morten Brørup
2025-08-21 20:35 ` [RFC 3/3] hash: add support for common small key sizes Stephen Hemminger
2025-08-22  7:19   ` Mattias Rönnblom
2025-08-22  9:50     ` Morten Brørup
2025-08-22 15:05       ` Mattias Rönnblom
2025-08-22 18:57         ` Morten Brørup
2025-08-25  6:05           ` Mattias Rönnblom
2025-08-22 16:12     ` Stephen Hemminger
2025-08-22 18:19 ` [PATCH v2 0/4] Cuckoo hash cleanup and optimizations Stephen Hemminger
2025-08-22 18:19   ` [PATCH v2 1/4] hash: move table of hash compare functions out of header Stephen Hemminger
2025-08-22 18:19   ` [PATCH v2 2/4] hash: use static_assert Stephen Hemminger
2025-08-26  6:58     ` Morten Brørup
2025-08-22 18:19   ` [PATCH v2 3/4] hash: reduce architecture special cases Stephen Hemminger
2025-08-26  6:55     ` Morten Brørup
2025-08-26 13:41       ` Stephen Hemminger
2025-08-26 14:13         ` Morten Brørup
2025-08-26 14:22           ` Stephen Hemminger
2025-08-26 14:36     ` Mattias Rönnblom [this message]
2025-08-26 16:25       ` Stephen Hemminger
2025-08-22 18:19   ` [PATCH v2 4/4] hash: add support for common small key sizes Stephen Hemminger
2025-08-26  6:58     ` Morten Brørup

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=05cd927d-e4e9-441f-b247-0bdc138c8de0@lysator.liu.se \
    --to=hofors@lysator.liu.se \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=sameh.gobriel@intel.com \
    --cc=stephen@networkplumber.org \
    --cc=vladimir.medvedkin@intel.com \
    --cc=wathsala.vithanage@arm.com \
    --cc=yipeng1.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).