From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
To: pbhagavatula@marvell.com
Cc: Honnappa.Nagarahalli@arm.com, bruce.richardson@intel.com,
dev@dpdk.org, jerinj@marvell.com, konstantin.v.ananyev@yandex.ru,
nd@arm.com, ruifeng.wang@arm.com, sameh.gobriel@intel.com,
vladimir.medvedkin@intel.com, yipeng1.wang@intel.com
Subject: Re: [PATCH v5 1/2] ip_frag: optimize key compare and hash generation
Date: Tue, 27 Jun 2023 10:23:32 +0100 [thread overview]
Message-ID: <2ed10713-4811-9eab-b8de-d055795c6bda@yandex.ru> (raw)
In-Reply-To: <20230602170147.4828-1-pbhagavatula@marvell.com>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Use optimized rte_hash_k32_cmp_eq routine for key comparison for
> x86 and ARM64.
> Use CRC instructions for hash generation on ARM64.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
> On Neoverse-N2, performance improved by 10% when measured with
> examples/ip_reassembly.
>
> v5 Changes:
> - Fix spellcheck.
> v4 Changes:
> - Fix compilation failures (sys/queue)
> - Update test case to use proper macros.
> v3 Changes:
> - Drop NEON patch.
> v2 Changes:
> - Fix compilation failure with non ARM64/x86 targets
>
> lib/hash/rte_cmp_arm64.h | 16 ++++++++--------
> lib/hash/rte_cmp_x86.h | 16 ++++++++--------
> lib/ip_frag/ip_frag_common.h | 14 ++++++++++++++
> lib/ip_frag/ip_frag_internal.c | 4 ++--
> 4 files changed, 32 insertions(+), 18 deletions(-)
>
> diff --git a/lib/hash/rte_cmp_arm64.h b/lib/hash/rte_cmp_arm64.h
> index e9e26f9abd..a3e85635eb 100644
> --- a/lib/hash/rte_cmp_arm64.h
> +++ b/lib/hash/rte_cmp_arm64.h
> @@ -3,7 +3,7 @@
> */
>
> /* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
> -static int
> +static inline int
> rte_hash_k16_cmp_eq(const void *key1, const void *key2,
> size_t key_len __rte_unused)
> {
> @@ -24,7 +24,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2,
> return !(x0 == 0 && x1 == 0);
> }
>
> -static int
> +static inline int
> rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> @@ -32,7 +32,7 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 16, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> @@ -42,7 +42,7 @@ rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 32, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
> @@ -50,7 +50,7 @@ rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 32, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> @@ -58,7 +58,7 @@ rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 64, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> @@ -66,7 +66,7 @@ rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 64, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> @@ -76,7 +76,7 @@ rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 96, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> diff --git a/lib/hash/rte_cmp_x86.h b/lib/hash/rte_cmp_x86.h
> index 13a5836351..ddfbef462f 100644
> --- a/lib/hash/rte_cmp_x86.h
> +++ b/lib/hash/rte_cmp_x86.h
> @@ -5,7 +5,7 @@
> #include <rte_vect.h>
>
> /* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
> -static int
> +static inline int
> rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
> {
> const __m128i k1 = _mm_loadu_si128((const __m128i *) key1);
> @@ -15,7 +15,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unu
> return !_mm_test_all_zeros(x, x);
> }
>
> -static int
> +static inline int
> rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> @@ -23,7 +23,7 @@ rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 16, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> @@ -33,7 +33,7 @@ rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 32, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
> @@ -41,7 +41,7 @@ rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 32, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> @@ -49,7 +49,7 @@ rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 64, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> @@ -57,7 +57,7 @@ rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 64, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> @@ -67,7 +67,7 @@ rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
> (const char *) key2 + 96, key_len);
> }
>
> -static int
> +static inline int
> rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
> index 0d8ce6a1e1..7d6c1aa98d 100644
> --- a/lib/ip_frag/ip_frag_common.h
> +++ b/lib/ip_frag/ip_frag_common.h
> @@ -7,6 +7,14 @@
>
> #include <sys/queue.h>
>
> +#include <rte_common.h>
> +
> +#if defined(RTE_ARCH_ARM64)
> +#include <rte_cmp_arm64.h>
> +#elif defined(RTE_ARCH_X86)
> +#include <rte_cmp_x86.h>
> +#endif
> +
> #include "rte_ip_frag.h"
> #include "ip_reassembly.h"
>
> @@ -75,12 +83,18 @@ ip_frag_key_invalidate(struct ip_frag_key * key)
> static inline uint64_t
> ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2)
> {
> +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> + return (k1->id_key_len != k2->id_key_len) ||
> + (k1->key_len == IPV4_KEYLEN ? k1->src_dst[0] != k2->src_dst[0] :
> + rte_hash_k32_cmp_eq(k1, k2, 32));
> +#else
> uint32_t i;
> uint64_t val;
> val = k1->id_key_len ^ k2->id_key_len;
> for (i = 0; i < k1->key_len; i++)
> val |= k1->src_dst[i] ^ k2->src_dst[i];
> return val;
> +#endif
> }
>
> /*
> diff --git a/lib/ip_frag/ip_frag_internal.c b/lib/ip_frag/ip_frag_internal.c
> index b436a4c931..7cbef647df 100644
> --- a/lib/ip_frag/ip_frag_internal.c
> +++ b/lib/ip_frag/ip_frag_internal.c
> @@ -45,7 +45,7 @@ ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
>
> p = (const uint32_t *)&key->src_dst;
>
> -#ifdef RTE_ARCH_X86
> +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
> v = rte_hash_crc_4byte(p[1], v);
> v = rte_hash_crc_4byte(key->id, v);
> @@ -66,7 +66,7 @@ ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
>
> p = (const uint32_t *) &key->src_dst;
>
> -#ifdef RTE_ARCH_X86
> +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
> v = rte_hash_crc_4byte(p[1], v);
> v = rte_hash_crc_4byte(p[2], v);
> --
Acked-by: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> 2.25.1
>
>
next prev parent reply other threads:[~2023-06-27 9:23 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-23 12:54 [PATCH 1/3] " pbhagavatula
2023-05-23 12:54 ` [PATCH 2/3] ip_frag: improve reassembly lookup performance pbhagavatula
2023-05-23 12:54 ` [PATCH 3/3] test: add reassembly perf test pbhagavatula
2023-05-23 14:39 ` [PATCH v2 1/3] ip_frag: optimize key compare and hash generation pbhagavatula
2023-05-23 14:39 ` [PATCH v2 2/3] ip_frag: improve reassembly lookup performance pbhagavatula
2023-05-23 16:22 ` Honnappa Nagarahalli
2023-05-23 17:58 ` Pavan Nikhilesh Bhagavatula
2023-05-23 22:23 ` Pavan Nikhilesh Bhagavatula
2023-05-23 22:30 ` Stephen Hemminger
2023-05-29 13:17 ` [EXT] " Pavan Nikhilesh Bhagavatula
2023-05-23 14:39 ` [PATCH v2 3/3] test: add reassembly perf test pbhagavatula
2023-05-29 14:55 ` [PATCH v3 1/2] ip_frag: optimize key compare and hash generation pbhagavatula
2023-05-29 14:55 ` [PATCH v3 2/2] test: add reassembly perf test pbhagavatula
2023-05-30 10:51 ` [EXT] " Amit Prakash Shukla
2023-05-30 3:09 ` [PATCH v3 1/2] ip_frag: optimize key compare and hash generation Stephen Hemminger
2023-05-30 17:50 ` [EXT] " Pavan Nikhilesh Bhagavatula
2023-05-30 7:44 ` Ruifeng Wang
2023-05-31 4:26 ` [PATCH v4 " pbhagavatula
2023-05-31 4:26 ` [PATCH v4 2/2] test: add reassembly perf test pbhagavatula
2023-06-05 11:12 ` Константин Ананьев
2023-06-02 17:01 ` [PATCH v5 1/2] ip_frag: optimize key compare and hash generation pbhagavatula
2023-06-02 17:01 ` [PATCH v5 2/2] test: add reassembly perf test pbhagavatula
2023-06-27 9:36 ` Konstantin Ananyev
2023-06-05 11:09 ` [PATCH v5 1/2] ip_frag: optimize key compare and hash generation Константин Ананьев
2023-06-27 9:23 ` Konstantin Ananyev [this message]
2023-07-11 16:52 ` [PATCH v6 " pbhagavatula
2023-07-11 16:52 ` [PATCH v6 2/2] test: add reassembly perf test pbhagavatula
2023-07-12 14:59 ` [PATCH v6 1/2] ip_frag: optimize key compare and hash generation Thomas Monjalon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2ed10713-4811-9eab-b8de-d055795c6bda@yandex.ru \
--to=konstantin.v.ananyev@yandex.ru \
--cc=Honnappa.Nagarahalli@arm.com \
--cc=bruce.richardson@intel.com \
--cc=dev@dpdk.org \
--cc=jerinj@marvell.com \
--cc=nd@arm.com \
--cc=pbhagavatula@marvell.com \
--cc=ruifeng.wang@arm.com \
--cc=sameh.gobriel@intel.com \
--cc=vladimir.medvedkin@intel.com \
--cc=yipeng1.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).