From: Ravi Kerur <rkerur@gmail.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH] Implement memcmp using AVX/SSE instructio
Date: Wed, 22 Apr 2015 08:33:48 -0700 [thread overview]
Message-ID: <1429716828-19012-2-git-send-email-rkerur@gmail.com> (raw)
In-Reply-To: <1429716828-19012-1-git-send-email-rkerur@gmail.com>
This patch replaces memcmp and strncmp in librte_hash with rte_memcmp which
is implemented with AVX/SSE instructions.
Preliminary results on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu
14.04 x86_64 shows
1 second improvement when hash key length <= 64
4 seconds improvement when hash key length <= 128
This patch is RFC to engage the team and improvise performance further.
Signed-off-by: Ravi Kerur <rkerur@gmail.com>
---
app/test/test_hash.c | 2 +-
app/test/test_hash_perf.c | 302 +++++++------
.../common/include/arch/ppc_64/rte_memcmp.h | 62 +++
.../common/include/arch/x86/rte_memcmp.h | 479 +++++++++++++++++++++
lib/librte_eal/common/include/generic/rte_memcmp.h | 119 +++++
lib/librte_hash/rte_fbk_hash.c | 7 +-
lib/librte_hash/rte_hash.c | 15 +-
lib/librte_hash/rte_hash.h | 2 +-
8 files changed, 857 insertions(+), 131 deletions(-)
create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h
create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 1da27c5..719c135 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -65,7 +65,7 @@
*/
static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {0, 2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+static uint32_t hashtest_key_lens[] = {0, 2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 40, 42, 48, 54, 60, 63, 64, 128};
/******************************************************************************/
#define LOCAL_FBK_HASH_ENTRIES_MAX (1 << 15)
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..cccf70f 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -95,7 +95,7 @@ struct tbl_perf_test_params {
static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 40, 42, 48, 54, 60, 63, 64, 128};
/******************************************************************************/
/*******************************************************************************
@@ -125,6 +125,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
{ ADD_ON_EMPTY, 1024, 1024, 4, 64, rte_jhash, 0},
{ ADD_ON_EMPTY, 1024, 1024, 8, 64, rte_jhash, 0},
{ ADD_ON_EMPTY, 1024, 1024, 16, 64, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 1, 128, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 2, 128, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 4, 128, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 8, 128, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 16, 128, rte_jhash, 0},
/* Small table, update */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
{ ADD_UPDATE, ITERATIONS, 1024, 1, 16, rte_jhash, 0},
@@ -147,6 +152,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
{ ADD_UPDATE, ITERATIONS, 1024, 4, 64, rte_jhash, 0},
{ ADD_UPDATE, ITERATIONS, 1024, 8, 64, rte_jhash, 0},
{ ADD_UPDATE, ITERATIONS, 1024, 16, 64, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 1, 128, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 2, 128, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 4, 128, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 8, 128, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 16, 128, rte_jhash, 0},
/* Small table, lookup */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
{ LOOKUP, ITERATIONS, 1024, 1, 16, rte_jhash, 0},
@@ -169,6 +179,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
{ LOOKUP, ITERATIONS, 1024, 4, 64, rte_jhash, 0},
{ LOOKUP, ITERATIONS, 1024, 8, 64, rte_jhash, 0},
{ LOOKUP, ITERATIONS, 1024, 16, 64, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1024, 1, 128, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1024, 2, 128, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1024, 4, 128, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1024, 8, 128, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1024, 16, 128, rte_jhash, 0},
/* Big table, add */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
{ ADD_ON_EMPTY, 1048576, 1048576, 1, 16, rte_jhash, 0},
@@ -191,6 +206,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
{ ADD_ON_EMPTY, 1048576, 1048576, 4, 64, rte_jhash, 0},
{ ADD_ON_EMPTY, 1048576, 1048576, 8, 64, rte_jhash, 0},
{ ADD_ON_EMPTY, 1048576, 1048576, 16, 64, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 1, 128, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 2, 128, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 4, 128, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 8, 128, rte_jhash, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 16, 128, rte_jhash, 0},
/* Big table, update */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
{ ADD_UPDATE, ITERATIONS, 1048576, 1, 16, rte_jhash, 0},
@@ -213,6 +233,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
{ ADD_UPDATE, ITERATIONS, 1048576, 4, 64, rte_jhash, 0},
{ ADD_UPDATE, ITERATIONS, 1048576, 8, 64, rte_jhash, 0},
{ ADD_UPDATE, ITERATIONS, 1048576, 16, 64, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 1, 128, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 2, 128, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 4, 128, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 8, 128, rte_jhash, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 16, 128, rte_jhash, 0},
/* Big table, lookup */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
{ LOOKUP, ITERATIONS, 1048576, 1, 16, rte_jhash, 0},
@@ -235,138 +260,173 @@ struct tbl_perf_test_params tbl_perf_params[] =
{ LOOKUP, ITERATIONS, 1048576, 4, 64, rte_jhash, 0},
{ LOOKUP, ITERATIONS, 1048576, 8, 64, rte_jhash, 0},
{ LOOKUP, ITERATIONS, 1048576, 16, 64, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1048576, 1, 128, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1048576, 2, 128, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1048576, 4, 128, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1048576, 8, 128, rte_jhash, 0},
+{ LOOKUP, ITERATIONS, 1048576, 16, 128, rte_jhash, 0},
/* Small table, add */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
-{ ADD_ON_EMPTY, 1024, 1024, 1, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 2, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 4, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 8, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 16, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 1, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 2, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 4, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 8, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 16, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 1, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 2, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 4, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 8, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 16, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 1, 64, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 2, 64, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 4, 64, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 8, 64, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1024, 1024, 16, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 1, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 2, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 4, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 8, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 16, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 1, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 2, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 4, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 8, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 16, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 1, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 2, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 4, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 8, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 16, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 1, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 2, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 4, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 8, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 16, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 1, 128, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 2, 128, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 4, 128, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 8, 128, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1024, 1024, 16, 128, rte_hash_crc, 0},
/* Small table, update */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
-{ ADD_UPDATE, ITERATIONS, 1024, 1, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 2, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 4, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 8, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 16, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 1, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 2, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 4, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 8, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 16, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 1, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 2, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 4, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 8, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 16, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 1, 64, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 2, 64, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 4, 64, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 8, 64, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1024, 16, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 1, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 2, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 4, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 8, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 16, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 1, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 2, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 4, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 8, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 16, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 1, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 2, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 4, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 8, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 16, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 1, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 2, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 4, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 8, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 16, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 1, 128, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 2, 128, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 4, 128, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 8, 128, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1024, 16, 128, rte_hash_crc, 0},
/* Small table, lookup */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
-{ LOOKUP, ITERATIONS, 1024, 1, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 2, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 4, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 8, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 16, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 1, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 2, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 4, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 8, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 16, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 1, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 2, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 4, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 8, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 16, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 1, 64, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 2, 64, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 4, 64, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 8, 64, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1024, 16, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 1, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 2, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 4, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 8, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 16, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 1, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 2, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 4, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 8, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 16, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 1, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 2, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 4, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 8, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 16, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 1, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 2, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 4, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 8, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 16, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 1, 128, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 2, 128, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 4, 128, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 8, 128, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1024, 16, 128, rte_hash_crc, 0},
/* Big table, add */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
-{ ADD_ON_EMPTY, 1048576, 1048576, 1, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 2, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 4, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 8, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 16, 16, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 1, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 2, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 4, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 8, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 16, 32, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 1, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 2, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 4, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 8, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 16, 48, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 1, 64, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 2, 64, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 4, 64, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 8, 64, rte_hash_crc, 0},
-{ ADD_ON_EMPTY, 1048576, 1048576, 16, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 1, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 2, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 4, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 8, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 16, 16, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 1, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 2, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 4, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 8, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 16, 32, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 1, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 2, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 4, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 8, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 16, 48, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 1, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 2, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 4, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 8, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 16, 64, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 1, 128, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 2, 128, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 4, 128, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 8, 128, rte_hash_crc, 0},
+{ ADD_ON_EMPTY, 1048576, 1048576, 16, 128, rte_hash_crc, 0},
/* Big table, update */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
-{ ADD_UPDATE, ITERATIONS, 1048576, 1, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 2, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 4, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 8, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 16, 16, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 1, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 2, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 4, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 8, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 16, 32, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 1, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 2, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 4, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 8, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 16, 48, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 1, 64, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 2, 64, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 4, 64, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 8, 64, rte_hash_crc, 0},
-{ ADD_UPDATE, ITERATIONS, 1048576, 16, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 1, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 2, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 4, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 8, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 16, 16, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 1, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 2, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 4, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 8, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 16, 32, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 1, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 2, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 4, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 8, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 16, 48, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 1, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 2, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 4, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 8, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 16, 64, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 1, 128, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 2, 128, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 4, 128, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 8, 128, rte_hash_crc, 0},
+{ ADD_UPDATE, ITERATIONS, 1048576, 16, 128, rte_hash_crc, 0},
/* Big table, lookup */
/* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
-{ LOOKUP, ITERATIONS, 1048576, 1, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 2, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 4, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 8, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 16, 16, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 1, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 2, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 4, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 8, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 16, 32, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 1, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 2, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 4, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 8, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 16, 48, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 1, 64, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 2, 64, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 4, 64, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 8, 64, rte_hash_crc, 0},
-{ LOOKUP, ITERATIONS, 1048576, 16, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 1, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 2, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 4, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 8, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 16, 16, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 1, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 2, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 4, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 8, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 16, 32, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 1, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 2, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 4, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 8, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 16, 48, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 1, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 2, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 4, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 8, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 16, 64, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 1, 128, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 2, 128, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 4, 128, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 8, 128, rte_hash_crc, 0},
+{ LOOKUP, ITERATIONS, 1048576, 16, 128, rte_hash_crc, 0},
};
/******************************************************************************/
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
new file mode 100644
index 0000000..7f99ee1
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
@@ -0,0 +1,62 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCMP_PPC_64_H_
+#define _RTE_MEMCMP_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+/*To include altivec.h, GCC version must >= 4.8 */
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcmp.h"
+
+#define rte_memcmp(dst, src, n) \
+ ({ (__builtin_constant_p(n)) ? \
+ memcmp((dst), (src), (n)) : \
+ rte_memcmp_func((dst), (src), (n)); })
+
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n)
+{
+ return memcmp(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
new file mode 100644
index 0000000..8ea34c0
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
@@ -0,0 +1,479 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_X86_64_H_
+#define _RTE_MEMCMP_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2 implementation of memcmp().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <rte_vect.h>
+#include <rte_branch_prediction.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to compare.
+ * @return
+ * true if equal otherwise false.
+ */
+static inline bool
+rte_memcmp(const void *src_1, const void *src,
+ size_t n) __attribute__((always_inline));
+
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+
+/**
+ * AVX2 implementation below
+ */
+
+/**
+ * Compare 16 bytes between two locations.
+ * locations should not overlap.
+ */
+static inline bool
+rte_cmp16(const uint8_t *src_1, const uint8_t *src_2)
+{
+ __m128i xmm0;
+ __m128i xmm1;
+ __m128i vcmp;
+ uint32_t vmask;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)src_1);
+ xmm1 = _mm_loadu_si128((const __m128i *)src_2);
+
+ vcmp = _mm_cmpeq_epi16(xmm0, xmm1);
+ vmask = _mm_movemask_epi8(vcmp);
+ return (!(vmask == 0xffffU));
+}
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp32(const uint8_t *src_1, const uint8_t *src_2)
+{
+ __m256i xmm0;
+ __m256i xmm1;
+ __m256i vcmp;
+ uint64_t vmask;
+
+ xmm0 = _mm256_loadu_si256((const __m256i *)src_1);
+ xmm1 = _mm256_loadu_si256((const __m256i *)src_2);
+
+ vcmp = _mm256_cmpeq_epi32(xmm0, xmm1);
+ vmask = _mm256_movemask_epi8(vcmp);
+ return (!(vmask == 0xffffffffU));
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp64(const uint8_t *src_1, const uint8_t *src_2)
+{
+ bool ret;
+
+ ret = rte_cmp32(src_1 + 0 * 32, src_2 + 0 * 32);
+
+ if (likely(ret == 0))
+ ret = rte_cmp32(src_1 + 1 * 32, src_2 + 1 * 32);
+
+ return ret;
+}
+
+/**
+ * Compare 128 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp128(const uint8_t *src_1, const uint8_t *src_2)
+{
+ bool ret;
+
+ ret = rte_cmp32(src_1 + 0 * 32, src_2 + 0 * 32);
+
+ if (likely(ret == 0))
+ ret = rte_cmp32(src_1 + 1 * 32, src_2 + 1 * 32);
+
+ if (likely(ret == 0))
+ ret = rte_cmp32(src_1 + 2 * 32, src_2 + 2 * 32);
+
+ if (likely(ret == 0))
+ ret = rte_cmp32(src_1 + 3 * 32, src_2 + 3 * 32);
+
+ return ret;
+}
+
+static inline bool
+rte_memcmp_remainder(const void *_src_1, const void *_src_2, size_t n)
+{
+ uintptr_t src_1u = (uintptr_t)_src_1;
+ uintptr_t src_2u = (uintptr_t)_src_2;
+
+ bool ret_1 = 1, ret_2 = 1, ret_4 = 1, ret_8 = 1;
+
+ /**
+ * Compare less than 16 bytes
+ */
+ if (n & 0x01) {
+ ret_1 = (*(uint8_t *)src_1u ==
+ *(const uint8_t *)src_2u);
+ src_1u = (uintptr_t)((const uint8_t *)src_1u + 1);
+ src_2u = (uintptr_t)((const uint8_t *)src_2u + 1);
+ }
+ if (n & 0x02) {
+ ret_2 = (*(uint16_t *)src_1u ==
+ *(const uint16_t *)src_2u);
+ src_1u = (uintptr_t)((const uint16_t *)src_1u + 1);
+ src_2u = (uintptr_t)((const uint16_t *)src_2u + 1);
+ }
+ if (n & 0x04) {
+ ret_4 = (*(uint32_t *)src_1u ==
+ *(const uint32_t *)src_2u);
+ src_1u = (uintptr_t)((const uint32_t *)src_1u + 1);
+ src_2u = (uintptr_t)((const uint32_t *)src_2u + 1);
+ }
+ if (n & 0x08) {
+ ret_8 = (*(uint64_t *)src_1u ==
+ *(const uint64_t *)src_2u);
+ }
+ return (!(ret_1 && ret_2 && ret_4 && ret_8));
+}
+
+static inline bool
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+ const uint8_t *src_1 = (const uint8_t *)_src_1;
+ const uint8_t *src_2 = (const uint8_t *)_src_2;
+ bool ret;
+
+ /**
+ * Compare less than 16 bytes
+ */
+ if (n < 16)
+ return rte_memcmp_remainder(_src_1, _src_2, n);
+
+ /**
+ * Fast way when compare size exceeds 16 bytes
+ */
+ if (n <= 32) {
+ if (likely(n & 0x20))
+ ret = rte_cmp32(src_1, src_2);
+ else {
+ ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ if (likely(ret == 0))
+ ret = rte_memcmp_remainder(src_1 - 16 + n,
+ src_2 - 16 + n, n - 16);
+ }
+ return ret;
+ }
+
+ if (n <= 48) {
+ if (likely(n & 0x30)) {
+ ret = rte_cmp32(src_1, src_2);
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 - 32 + n, src_2 - 32 + n);
+ } else {
+ ret = rte_cmp32(src_1, src_2);
+ if (likely(ret == 0))
+ ret = rte_memcmp_remainder(src_1 - 32 + n,
+ src_2 - 32 + n, n - 32);
+ }
+ return ret;
+ }
+
+ if (n <= 64) {
+ if (likely(n & 0x40))
+ ret = rte_cmp64(src_1, src_2);
+ else {
+ ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 - 32 + n,
+ src_2 - 32 + n);
+
+ if (likely(ret == 0))
+ ret = rte_memcmp_remainder(src_1 - 48 + n,
+ src_2 - 48 + n, n - 48);
+ }
+ return ret;
+ }
+
+ if (n <= 128) {
+ if (likely(n & 0x80))
+ ret = rte_cmp128(src_1, src_2);
+ else {
+ ret = rte_cmp64(src_1, src_2);
+ if (likely(ret == 0))
+ ret = rte_cmp32(src_1 - 64 + n, src_2 - 64 + n);
+
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 - 96 + n, src_2 - 96 + n);
+
+ if (likely(ret == 0))
+ ret = rte_memcmp_remainder(src_1 - 112 + n,
+ src_2 - 112 + n, n - 112);
+ }
+ return ret;
+ }
+
+ return 0;
+}
+
+#else /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+/**
+ * SSE & AVX implementation below
+ */
+
+/**
+ * Compare 16 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp16(const uint8_t *src_1, const uint8_t *src_2)
+{
+ __m128i xmm0;
+ __m128i xmm1;
+ __m128i vcmp;
+ uint32_t vmask;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)src_1);
+ xmm1 = _mm_loadu_si128((const __m128i *)src_2);
+
+ vcmp = _mm_cmpeq_epi16(xmm0, xmm1);
+ vmask = _mm_movemask_epi8(vcmp);
+ return (!(vmask == 0xffffU));
+}
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp32(const uint8_t *src_1, const uint8_t *src_2)
+{
+ bool ret;
+
+ ret = rte_cmp16(src_1 + 0 * 16, src_2 + 0 * 16);
+
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 + 1 * 16, src_2 + 1 * 16);
+
+ return ret;
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp64(const uint8_t *src_1, const uint8_t *src_2)
+{
+ bool ret;
+
+ ret = rte_cmp16(src_1 + 0 * 16, src_2 + 0 * 16);
+
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 + 1 * 16, src_2 + 1 * 16);
+
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 + 2 * 16, src_2 + 2 * 16);
+
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 + 3 * 16, src_2 + 3 * 16);
+
+ return ret;
+}
+
+/**
+ * Compare 128 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp128(const uint8_t *src_1, const uint8_t *src_2)
+{
+ bool ret;
+
+ ret = rte_cmp64(src_1 + 0 * 64, src_2 + 0 * 64);
+
+ if (likely(ret == 0))
+ ret = rte_cmp64(src_1 + 1 * 64, src_2 + 1 * 64);
+
+ return ret;
+}
+
+static inline bool
+rte_memcmp_remainder(const void *_src_1, const void *_src_2, size_t n)
+{
+ uintptr_t src_1u = (uintptr_t)_src_1;
+ uintptr_t src_2u = (uintptr_t)_src_2;
+
+ bool ret_1 = 1, ret_2 = 1, ret_4 = 1, ret_8 = 1;
+
+ /**
+ * Compare less than 16 bytes
+ */
+ if (n & 0x01) {
+ ret_1 = (*(uint8_t *)src_1u ==
+ *(const uint8_t *)src_2u);
+ src_1u = (uintptr_t)((const uint8_t *)src_1u + 1);
+ src_2u = (uintptr_t)((const uint8_t *)src_2u + 1);
+ }
+ if (n & 0x02) {
+ ret_2 = (*(uint16_t *)src_1u ==
+ *(const uint16_t *)src_2u);
+ src_1u = (uintptr_t)((const uint16_t *)src_1u + 1);
+ src_2u = (uintptr_t)((const uint16_t *)src_2u + 1);
+ }
+ if (n & 0x04) {
+ ret_4 = (*(uint32_t *)src_1u ==
+ *(const uint32_t *)src_2u);
+ src_1u = (uintptr_t)((const uint32_t *)src_1u + 1);
+ src_2u = (uintptr_t)((const uint32_t *)src_2u + 1);
+ }
+ if (n & 0x08) {
+ ret_8 = (*(uint64_t *)src_1u ==
+ *(const uint64_t *)src_2u);
+ }
+ return (!(ret_1 && ret_2 && ret_4 && ret_8));
+}
+
+static inline bool
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+ const uint8_t *src_1 = (const uint8_t *)_src_1;
+ const uint8_t *src_2 = (const uint8_t *)_src_2;
+ bool ret;
+
+ /**
+ * Compare less than 16 bytes
+ */
+ if (n < 16)
+ return rte_memcmp_remainder(_src_1, _src_2, n);
+
+ /**
+ * Fast way when compare size exceeds 16 bytes
+ */
+ if (n <= 32) {
+ if (likely(n & 0x20))
+ ret = rte_cmp32(src_1, src_2);
+ else {
+ ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ if (likely(ret == 0))
+ ret = rte_memcmp_remainder(src_1 - 16 + n,
+ src_2 - 16 + n, n - 16);
+ }
+ return ret;
+ }
+
+ if (n <= 48) {
+ if (likely(n & 0x30)) {
+ ret = rte_cmp32(src_1, src_2);
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 - 32 + n, src_2 - 32 + n);
+ } else {
+ ret = rte_cmp32(src_1, src_2);
+ if (likely(ret == 0))
+ ret = rte_memcmp_remainder(src_1 - 32 + n,
+ src_2 - 32 + n, n - 32);
+ }
+ return ret;
+ }
+
+ if (n <= 64) {
+ if (likely(n & 0x40))
+ ret = rte_cmp64(src_1, src_2);
+ else {
+ ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 - 32 + n,
+ src_2 - 32 + n);
+
+ if (likely(ret == 0))
+ ret = rte_memcmp_remainder(src_1 - 48 + n,
+ src_2 - 48 + n, n - 48);
+ }
+ return ret;
+ }
+
+ if (n <= 128) {
+ if (likely(n & 0x80))
+ ret = rte_cmp128(src_1, src_2);
+ else {
+ ret = rte_cmp64(src_1, src_2);
+ if (likely(ret == 0))
+ ret = rte_cmp32(src_1 - 64 + n, src_2 - 64 + n);
+
+ if (likely(ret == 0))
+ ret = rte_cmp16(src_1 - 96 + n, src_2 - 96 + n);
+
+ if (likely(ret == 0))
+ ret = rte_memcmp_remainder(src_1 - 112 + n,
+ src_2 - 112 + n, n - 112);
+ }
+ return ret;
+ }
+
+
+ return 0;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h
new file mode 100644
index 0000000..694c659
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memcmp.h
@@ -0,0 +1,119 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_H_
+#define _RTE_MEMCMP_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcmp().
+ */
+
+/**
+ * Compare 16 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ */
+static inline bool
+rte_cmp16(const uint8_t *src_1, const uint8_t *src_2);
+
+/**
+ * Compare 32 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ */
+static inline bool
+rte_cmp32(const uint8_t *src_1, const uint8_t *src_2);
+
+/**
+ * Compare 64 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ */
+static inline bool
+rte_cmp64(const uint8_t *src_1, const uint8_t *src_2);
+
+/**
+ * Compare 128 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ */
+static inline bool
+rte_cmp128(const uint8_t *src_1, const uint8_t *src_2);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to copy.
+ * @return
+ * true if match otherwise false.
+ */
+static bool
+rte_memcmp(const void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+/*
+ * memcmp() function used by rte_memcmp macro
+ */
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+#endif /* _RTE_MEMCMP_H_ */
diff --git a/lib/librte_hash/rte_fbk_hash.c b/lib/librte_hash/rte_fbk_hash.c
index 356ddfe..5e796c9 100644
--- a/lib/librte_hash/rte_fbk_hash.c
+++ b/lib/librte_hash/rte_fbk_hash.c
@@ -40,6 +40,7 @@
#include <sys/queue.h>
#include <rte_memory.h>
#include <rte_memzone.h>
+#include <rte_memcmp.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
@@ -83,7 +84,8 @@ rte_fbk_hash_find_existing(const char *name)
rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(te, fbk_hash_list, next) {
h = (struct rte_fbk_hash_table *) te->data;
- if (strncmp(name, h->name, RTE_FBK_HASH_NAMESIZE) == 0)
+ if (rte_memcmp(name, h->name,
+ RTE_MIN(strlen(name), strlen(h->name)) + 1) == 0)
break;
}
rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
@@ -137,7 +139,8 @@ rte_fbk_hash_create(const struct rte_fbk_hash_params *params)
/* guarantee there's no existing */
TAILQ_FOREACH(te, fbk_hash_list, next) {
ht = (struct rte_fbk_hash_table *) te->data;
- if (strncmp(params->name, ht->name, RTE_FBK_HASH_NAMESIZE) == 0)
+ if (rte_memcmp(params->name, ht->name,
+ RTE_MIN(strlen(params->name), strlen(ht->name)) + 1) == 0)
break;
}
if (te != NULL)
diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c
index 9245716..bd14f2b 100644
--- a/lib/librte_hash/rte_hash.c
+++ b/lib/librte_hash/rte_hash.c
@@ -42,6 +42,7 @@
#include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */
#include <rte_log.h>
#include <rte_memcpy.h>
+#include <rte_memcmp.h>
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
#include <rte_memzone.h>
@@ -153,7 +154,8 @@ rte_hash_find_existing(const char *name)
rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(te, hash_list, next) {
h = (struct rte_hash *) te->data;
- if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
+ if (rte_memcmp(name, h->name,
+ RTE_MIN(strlen(name), strlen(h->name)) + 1) == 0)
break;
}
rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
@@ -213,7 +215,8 @@ rte_hash_create(const struct rte_hash_parameters *params)
/* guarantee there's no existing */
TAILQ_FOREACH(te, hash_list, next) {
h = (struct rte_hash *) te->data;
- if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
+ if (rte_memcmp(params->name, h->name,
+ RTE_MIN(strlen(params->name), strlen(h->name)) + 1) == 0)
break;
}
if (te != NULL)
@@ -309,7 +312,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
+ likely(rte_memcmp(key, get_key_from_bucket(h, key_bucket, i),
h->key_len) == 0)) {
return bucket_index * h->bucket_entries + i;
}
@@ -359,7 +362,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
+ likely(rte_memcmp(key, get_key_from_bucket(h, key_bucket, i),
h->key_len) == 0)) {
sig_bucket[i] = NULL_SIGNATURE;
return bucket_index * h->bucket_entries + i;
@@ -401,7 +404,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
+ likely(rte_memcmp(key, get_key_from_bucket(h, key_bucket, i),
h->key_len) == 0)) {
return bucket_index * h->bucket_entries + i;
}
@@ -457,7 +460,7 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
for (j = 0; j < h->bucket_entries; j++) {
if ((sigs[i] == sig_bucket[j]) &&
- likely(memcmp(keys[i],
+ likely(rte_memcmp(keys[i],
get_key_from_bucket(h, key_bucket, j),
h->key_len) == 0)) {
positions[i] = bucket_index *
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 821a9d4..d335d0b 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -54,7 +54,7 @@ extern "C" {
#define RTE_HASH_BUCKET_ENTRIES_MAX 16
/** Maximum length of key that can be used. */
-#define RTE_HASH_KEY_LENGTH_MAX 64
+#define RTE_HASH_KEY_LENGTH_MAX 128
/** Max number of keys that can be searched for using rte_hash_lookup_multi. */
#define RTE_HASH_LOOKUP_BULK_MAX 16
--
1.9.1
next prev parent reply other threads:[~2015-04-22 15:33 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-22 15:33 [dpdk-dev] [PATCH] Implement rte_memcmp with AVX/SSE instructions Ravi Kerur
2015-04-22 15:33 ` Ravi Kerur [this message]
2015-04-23 7:24 ` [dpdk-dev] [PATCH] Implement memcmp using AVX/SSE instructio Pawel Wodkowski
2015-04-23 8:11 ` Bruce Richardson
2015-04-23 8:21 ` Luke Gorrie
2015-04-23 9:23 ` Ananyev, Konstantin
2015-04-23 13:53 ` Ravi Kerur
2015-04-23 14:00 ` Bruce Richardson
2015-04-23 22:26 ` Ravi Kerur
2015-05-05 21:56 ` Ravi Kerur
2015-04-23 13:43 ` Ravi Kerur
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1429716828-19012-2-git-send-email-rkerur@gmail.com \
--to=rkerur@gmail.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).