DPDK patches and discussions
 help / color / mirror / Atom feed
From: Ravi Kerur <rkerur@gmail.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH] Implement memcmp using AVX/SSE instructio
Date: Wed, 22 Apr 2015 08:33:48 -0700	[thread overview]
Message-ID: <1429716828-19012-2-git-send-email-rkerur@gmail.com> (raw)
In-Reply-To: <1429716828-19012-1-git-send-email-rkerur@gmail.com>

This patch replaces memcmp and strncmp in librte_hash with rte_memcmp which
is implemented with AVX/SSE instructions.

Preliminary results on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu
14.04 x86_64 shows

1 second improvement when hash key length <= 64
4 seconds improvement when hash key length <= 128

This patch is RFC to engage the team and improvise performance further.

Signed-off-by: Ravi Kerur <rkerur@gmail.com>
---
 app/test/test_hash.c                               |   2 +-
 app/test/test_hash_perf.c                          | 302 +++++++------
 .../common/include/arch/ppc_64/rte_memcmp.h        |  62 +++
 .../common/include/arch/x86/rte_memcmp.h           | 479 +++++++++++++++++++++
 lib/librte_eal/common/include/generic/rte_memcmp.h | 119 +++++
 lib/librte_hash/rte_fbk_hash.c                     |   7 +-
 lib/librte_hash/rte_hash.c                         |  15 +-
 lib/librte_hash/rte_hash.h                         |   2 +-
 8 files changed, 857 insertions(+), 131 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h

diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 1da27c5..719c135 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -65,7 +65,7 @@
  */
 static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
 static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {0, 2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+static uint32_t hashtest_key_lens[] = {0, 2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 40, 42, 48, 54, 60, 63, 64, 128};
 /******************************************************************************/
 #define LOCAL_FBK_HASH_ENTRIES_MAX (1 << 15)
 
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..cccf70f 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -95,7 +95,7 @@ struct tbl_perf_test_params {
 
 static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
 static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 40, 42, 48, 54, 60, 63, 64, 128};
 /******************************************************************************/
 
 /*******************************************************************************
@@ -125,6 +125,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
 { ADD_ON_EMPTY,        1024,     1024,           4,      64,     rte_jhash,  0},
 { ADD_ON_EMPTY,        1024,     1024,           8,      64,     rte_jhash,  0},
 { ADD_ON_EMPTY,        1024,     1024,          16,      64,     rte_jhash,  0},
+{ ADD_ON_EMPTY,        1024,     1024,           1,      128,    rte_jhash,  0},
+{ ADD_ON_EMPTY,        1024,     1024,           2,      128,    rte_jhash,  0},
+{ ADD_ON_EMPTY,        1024,     1024,           4,      128,    rte_jhash,  0},
+{ ADD_ON_EMPTY,        1024,     1024,           8,      128,    rte_jhash,  0},
+{ ADD_ON_EMPTY,        1024,     1024,          16,      128,    rte_jhash,  0},
 /* Small table, update */
 /*  Test type | Iterations | Entries | BucketSize | KeyLen |     HashFunc | InitVal */
 {   ADD_UPDATE,  ITERATIONS,     1024,           1,      16,     rte_jhash,  0},
@@ -147,6 +152,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
 {   ADD_UPDATE,  ITERATIONS,     1024,           4,      64,     rte_jhash,  0},
 {   ADD_UPDATE,  ITERATIONS,     1024,           8,      64,     rte_jhash,  0},
 {   ADD_UPDATE,  ITERATIONS,     1024,          16,      64,     rte_jhash,  0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           1,      128,    rte_jhash,  0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           2,      128,    rte_jhash,  0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           4,      128,    rte_jhash,  0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           8,      128,    rte_jhash,  0},
+{   ADD_UPDATE,  ITERATIONS,     1024,          16,      128,    rte_jhash,  0},
 /* Small table, lookup */
 /*  Test type | Iterations | Entries | BucketSize | KeyLen |     HashFunc | InitVal */
 {       LOOKUP,  ITERATIONS,     1024,           1,      16,     rte_jhash,  0},
@@ -169,6 +179,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
 {       LOOKUP,  ITERATIONS,     1024,           4,      64,     rte_jhash,  0},
 {       LOOKUP,  ITERATIONS,     1024,           8,      64,     rte_jhash,  0},
 {       LOOKUP,  ITERATIONS,     1024,          16,      64,     rte_jhash,  0},
+{       LOOKUP,  ITERATIONS,     1024,           1,      128,    rte_jhash,  0},
+{       LOOKUP,  ITERATIONS,     1024,           2,      128,    rte_jhash,  0},
+{       LOOKUP,  ITERATIONS,     1024,           4,      128,    rte_jhash,  0},
+{       LOOKUP,  ITERATIONS,     1024,           8,      128,    rte_jhash,  0},
+{       LOOKUP,  ITERATIONS,     1024,          16,      128,    rte_jhash,  0},
 /* Big table, add */
 /* Test type  | Iterations | Entries | BucketSize | KeyLen |    HashFunc | InitVal */
 { ADD_ON_EMPTY,     1048576,  1048576,           1,      16,    rte_jhash,   0},
@@ -191,6 +206,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
 { ADD_ON_EMPTY,     1048576,  1048576,           4,      64,    rte_jhash,   0},
 { ADD_ON_EMPTY,     1048576,  1048576,           8,      64,    rte_jhash,   0},
 { ADD_ON_EMPTY,     1048576,  1048576,          16,      64,    rte_jhash,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           1,      128,   rte_jhash,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           2,      128,   rte_jhash,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           4,      128,   rte_jhash,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           8,      128,   rte_jhash,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,          16,      128,   rte_jhash,   0},
 /* Big table, update */
 /* Test type  | Iterations | Entries | BucketSize | KeyLen |    HashFunc | InitVal */
 {   ADD_UPDATE,  ITERATIONS,  1048576,           1,      16,    rte_jhash,   0},
@@ -213,6 +233,11 @@ struct tbl_perf_test_params tbl_perf_params[] =
 {   ADD_UPDATE,  ITERATIONS,  1048576,           4,      64,    rte_jhash,   0},
 {   ADD_UPDATE,  ITERATIONS,  1048576,           8,      64,    rte_jhash,   0},
 {   ADD_UPDATE,  ITERATIONS,  1048576,          16,      64,    rte_jhash,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      128,   rte_jhash,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      128,   rte_jhash,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      128,   rte_jhash,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      128,   rte_jhash,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      128,   rte_jhash,   0},
 /* Big table, lookup */
 /* Test type  | Iterations | Entries | BucketSize | KeyLen |    HashFunc | InitVal */
 {       LOOKUP,  ITERATIONS,  1048576,           1,      16,    rte_jhash,   0},
@@ -235,138 +260,173 @@ struct tbl_perf_test_params tbl_perf_params[] =
 {       LOOKUP,  ITERATIONS,  1048576,           4,      64,    rte_jhash,   0},
 {       LOOKUP,  ITERATIONS,  1048576,           8,      64,    rte_jhash,   0},
 {       LOOKUP,  ITERATIONS,  1048576,          16,      64,    rte_jhash,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           1,      128,   rte_jhash,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           2,      128,   rte_jhash,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           4,      128,   rte_jhash,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           8,      128,   rte_jhash,   0},
+{       LOOKUP,  ITERATIONS,  1048576,          16,      128,   rte_jhash,   0},
 /* Small table, add */
 /*  Test type | Iterations | Entries | BucketSize | KeyLen |    HashFunc | InitVal */
-{ ADD_ON_EMPTY,        1024,     1024,           1,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           2,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           4,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           8,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,          16,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           1,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           2,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           4,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           8,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,          16,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           1,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           2,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           4,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           8,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,          16,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           1,      64, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           2,      64, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           4,      64, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,           8,      64, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,        1024,     1024,          16,      64, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           1,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           2,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           4,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           8,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,          16,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           1,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           2,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           4,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           8,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,          16,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           1,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           2,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           4,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           8,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,          16,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           1,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           2,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           4,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           8,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,          16,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           1,      128, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           2,      128, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           4,      128, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,           8,      128, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,        1024,     1024,          16,      128, rte_hash_crc,   0},
 /* Small table, update */
 /*  Test type | Iterations | Entries | BucketSize | KeyLen |    HashFunc | InitVal */
-{   ADD_UPDATE,  ITERATIONS,     1024,           1,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           2,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           4,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           8,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,          16,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           1,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           2,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           4,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           8,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,          16,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           1,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           2,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           4,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           8,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,          16,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           1,      64, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           2,      64, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           4,      64, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,           8,      64, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,     1024,          16,      64, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           1,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           2,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           4,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           8,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,          16,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           1,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           2,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           4,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           8,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,          16,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           1,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           2,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           4,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           8,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,          16,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           1,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           2,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           4,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           8,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,          16,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           1,      128, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           2,      128, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           4,      128, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,           8,      128, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,     1024,          16,      128, rte_hash_crc,   0},
 /* Small table, lookup */
 /*  Test type | Iterations | Entries | BucketSize | KeyLen |    HashFunc | InitVal */
-{       LOOKUP,  ITERATIONS,     1024,           1,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           2,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           4,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           8,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,          16,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           1,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           2,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           4,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           8,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,          16,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           1,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           2,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           4,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           8,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,          16,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           1,      64, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           2,      64, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           4,      64, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,           8,      64, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,     1024,          16,      64, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           1,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           2,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           4,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           8,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,          16,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           1,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           2,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           4,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           8,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,          16,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           1,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           2,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           4,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           8,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,          16,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           1,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           2,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           4,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           8,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,          16,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           1,      128, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           2,      128, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           4,      128, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,           8,      128, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,     1024,          16,      128, rte_hash_crc,   0},
 /* Big table, add */
 /* Test type  | Iterations | Entries | BucketSize | KeyLen |    HashFunc | InitVal */
-{ ADD_ON_EMPTY,     1048576,  1048576,           1,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           2,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           4,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           8,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,          16,      16, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           1,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           2,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           4,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           8,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,          16,      32, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           1,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           2,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           4,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           8,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,          16,      48, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           1,      64, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           2,      64, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           4,      64, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,           8,      64, rte_hash_crc,   0},
-{ ADD_ON_EMPTY,     1048576,  1048576,          16,      64, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           1,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           2,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           4,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           8,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,          16,      16,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           1,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           2,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           4,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           8,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,          16,      32,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           1,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           2,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           4,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           8,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,          16,      48,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           1,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           2,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           4,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           8,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,          16,      64,  rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           1,      128, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           2,      128, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           4,      128, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,           8,      128, rte_hash_crc,   0},
+{ ADD_ON_EMPTY,     1048576,  1048576,          16,      128, rte_hash_crc,   0},
 /* Big table, update */
 /* Test type  | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
-{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      16, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      32, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      48, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      64, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      64, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      64, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      64, rte_hash_crc,   0},
-{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      64, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      16,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      32,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      48,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      64,  rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           1,      128, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           2,      128, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           4,      128, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,           8,      128, rte_hash_crc,   0},
+{   ADD_UPDATE,  ITERATIONS,  1048576,          16,      128, rte_hash_crc,   0},
 /* Big table, lookup */
 /* Test type  | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */
-{       LOOKUP,  ITERATIONS,  1048576,           1,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           2,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           4,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           8,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,          16,      16, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           1,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           2,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           4,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           8,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,          16,      32, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           1,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           2,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           4,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           8,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,          16,      48, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           1,      64, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           2,      64, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           4,      64, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,           8,      64, rte_hash_crc,   0},
-{       LOOKUP,  ITERATIONS,  1048576,          16,      64, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           1,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           2,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           4,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           8,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,          16,      16,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           1,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           2,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           4,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           8,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,          16,      32,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           1,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           2,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           4,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           8,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,          16,      48,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           1,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           2,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           4,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           8,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,          16,      64,  rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           1,      128, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           2,      128, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           4,      128, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,           8,      128, rte_hash_crc,   0},
+{       LOOKUP,  ITERATIONS,  1048576,          16,      128, rte_hash_crc,   0},
 };
 
 /******************************************************************************/
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
new file mode 100644
index 0000000..7f99ee1
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
@@ -0,0 +1,62 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCMP_PPC_64_H_
+#define _RTE_MEMCMP_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+/*To include altivec.h, GCC version must  >= 4.8 */
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcmp.h"
+
+#define rte_memcmp(dst, src, n)              \
+	({ (__builtin_constant_p(n)) ?       \
+	memcmp((dst), (src), (n)) :          \
+	rte_memcmp_func((dst), (src), (n)); })
+
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n)
+{
+	return memcmp(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
new file mode 100644
index 0000000..8ea34c0
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
@@ -0,0 +1,479 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_X86_64_H_
+#define _RTE_MEMCMP_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2 implementation of memcmp().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <rte_vect.h>
+#include <rte_branch_prediction.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ * @param n
+ *   Number of bytes to compare.
+ * @return
+ *   true if equal otherwise false.
+ */
+static inline bool
+rte_memcmp(const void *src_1, const void *src,
+		size_t n) __attribute__((always_inline));
+
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+
+/**
+ * AVX2 implementation below
+ */
+
+/**
+ * Compare 16 bytes between two locations.
+ * locations should not overlap.
+ */
+static inline bool
+rte_cmp16(const uint8_t *src_1, const uint8_t *src_2)
+{
+	__m128i xmm0;
+	__m128i xmm1;
+	__m128i vcmp;
+	uint32_t vmask;
+
+	xmm0 = _mm_loadu_si128((const __m128i *)src_1);
+	xmm1 = _mm_loadu_si128((const __m128i *)src_2);
+
+	vcmp = _mm_cmpeq_epi16(xmm0, xmm1);
+	vmask = _mm_movemask_epi8(vcmp);
+	return (!(vmask == 0xffffU));
+}
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp32(const uint8_t *src_1, const uint8_t *src_2)
+{
+	__m256i xmm0;
+	__m256i xmm1;
+	__m256i vcmp;
+	uint64_t vmask;
+
+	xmm0 = _mm256_loadu_si256((const __m256i *)src_1);
+	xmm1 = _mm256_loadu_si256((const __m256i *)src_2);
+
+	vcmp = _mm256_cmpeq_epi32(xmm0, xmm1);
+	vmask = _mm256_movemask_epi8(vcmp);
+	return (!(vmask == 0xffffffffU));
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp64(const uint8_t *src_1, const uint8_t *src_2)
+{
+	bool ret;
+
+	ret = rte_cmp32(src_1 + 0 * 32, src_2 + 0 * 32);
+
+	if (likely(ret == 0))
+		ret = rte_cmp32(src_1 + 1 * 32, src_2 + 1 * 32);
+
+	return ret;
+}
+
+/**
+ * Compare 128 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp128(const uint8_t *src_1, const uint8_t *src_2)
+{
+	bool ret;
+
+	ret = rte_cmp32(src_1 + 0 * 32, src_2 + 0 * 32);
+
+	if (likely(ret == 0))
+		ret = rte_cmp32(src_1 + 1 * 32, src_2 + 1 * 32);
+
+	if (likely(ret == 0))
+		ret = rte_cmp32(src_1 + 2 * 32, src_2 + 2 * 32);
+
+	if (likely(ret == 0))
+		ret = rte_cmp32(src_1 + 3 * 32, src_2 + 3 * 32);
+
+	return ret;
+}
+
+static inline bool
+rte_memcmp_remainder(const void *_src_1, const void *_src_2, size_t n)
+{
+	uintptr_t src_1u = (uintptr_t)_src_1;
+	uintptr_t src_2u = (uintptr_t)_src_2;
+
+	bool ret_1 = 1, ret_2 = 1, ret_4 = 1, ret_8 = 1;
+
+	/**
+	 * Compare less than 16 bytes
+	 */
+	if (n & 0x01) {
+		ret_1 = (*(uint8_t *)src_1u ==
+				*(const uint8_t *)src_2u);
+		src_1u = (uintptr_t)((const uint8_t *)src_1u + 1);
+		src_2u = (uintptr_t)((const uint8_t *)src_2u + 1);
+	}
+	if (n & 0x02) {
+		ret_2 = (*(uint16_t *)src_1u ==
+				*(const uint16_t *)src_2u);
+		src_1u = (uintptr_t)((const uint16_t *)src_1u + 1);
+		src_2u = (uintptr_t)((const uint16_t *)src_2u + 1);
+	}
+	if (n & 0x04) {
+		ret_4 = (*(uint32_t *)src_1u ==
+				*(const uint32_t *)src_2u);
+		src_1u = (uintptr_t)((const uint32_t *)src_1u + 1);
+		src_2u = (uintptr_t)((const uint32_t *)src_2u + 1);
+	}
+	if (n & 0x08) {
+		ret_8 = (*(uint64_t *)src_1u ==
+				*(const uint64_t *)src_2u);
+	}
+	return (!(ret_1 && ret_2 && ret_4 && ret_8));
+}
+
+static inline bool
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+	const uint8_t *src_1 = (const uint8_t *)_src_1;
+	const uint8_t *src_2 = (const uint8_t *)_src_2;
+	bool ret;
+
+	/**
+	 * Compare less than 16 bytes
+	 */
+	if (n < 16)
+		return rte_memcmp_remainder(_src_1, _src_2, n);
+
+	/**
+	 * Fast way when compare size exceeds 16 bytes
+	 */
+	if (n <= 32) {
+		if (likely(n & 0x20))
+			ret = rte_cmp32(src_1, src_2);
+		else {
+			ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+			if (likely(ret == 0))
+				ret = rte_memcmp_remainder(src_1 - 16 + n,
+						src_2 - 16 + n, n - 16);
+		}
+		return ret;
+	}
+
+	if (n <= 48) {
+		if (likely(n & 0x30)) {
+			ret = rte_cmp32(src_1, src_2);
+			if (likely(ret == 0))
+				ret = rte_cmp16(src_1 - 32 + n, src_2 - 32 + n);
+		} else {
+			ret = rte_cmp32(src_1, src_2);
+			if (likely(ret == 0))
+				ret = rte_memcmp_remainder(src_1 - 32 + n,
+						src_2 - 32 + n, n - 32);
+		}
+		return ret;
+	}
+
+	if (n <= 64) {
+		if (likely(n & 0x40))
+			ret = rte_cmp64(src_1, src_2);
+		else {
+			ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+			if (likely(ret == 0))
+				ret = rte_cmp16(src_1 - 32 + n,
+						src_2 - 32 + n);
+
+			if (likely(ret == 0))
+				ret = rte_memcmp_remainder(src_1 - 48 + n,
+						src_2 - 48 + n, n - 48);
+		}
+		return ret;
+	}
+
+	if (n <= 128) {
+		if (likely(n & 0x80))
+			ret = rte_cmp128(src_1, src_2);
+		else {
+			ret = rte_cmp64(src_1, src_2);
+			if (likely(ret == 0))
+				ret = rte_cmp32(src_1 - 64 + n, src_2 - 64 + n);
+
+			if (likely(ret == 0))
+				ret = rte_cmp16(src_1 - 96 + n, src_2 - 96 + n);
+
+			if (likely(ret == 0))
+				ret = rte_memcmp_remainder(src_1 - 112 + n,
+						src_2 - 112 + n, n - 112);
+		}
+		return ret;
+	}
+
+	return 0;
+}
+
+#else /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+/**
+ * SSE & AVX implementation below
+ */
+
+/**
+ * Compare 16 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp16(const uint8_t *src_1, const uint8_t *src_2)
+{
+	__m128i xmm0;
+	__m128i xmm1;
+	__m128i vcmp;
+	uint32_t vmask;
+
+	xmm0 = _mm_loadu_si128((const __m128i *)src_1);
+	xmm1 = _mm_loadu_si128((const __m128i *)src_2);
+
+	vcmp = _mm_cmpeq_epi16(xmm0, xmm1);
+	vmask = _mm_movemask_epi8(vcmp);
+	return (!(vmask == 0xffffU));
+}
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp32(const uint8_t *src_1, const uint8_t *src_2)
+{
+	bool ret;
+
+	ret = rte_cmp16(src_1 + 0 * 16, src_2 + 0 * 16);
+
+	if (likely(ret == 0))
+		ret = rte_cmp16(src_1 + 1 * 16, src_2 + 1 * 16);
+
+	return ret;
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp64(const uint8_t *src_1, const uint8_t *src_2)
+{
+	bool ret;
+
+	ret = rte_cmp16(src_1 + 0 * 16, src_2 + 0 * 16);
+
+	if (likely(ret == 0))
+		ret = rte_cmp16(src_1 + 1 * 16, src_2 + 1 * 16);
+
+	if (likely(ret == 0))
+		ret = rte_cmp16(src_1 + 2 * 16, src_2 + 2 * 16);
+
+	if (likely(ret == 0))
+		ret = rte_cmp16(src_1 + 3 * 16, src_2 + 3 * 16);
+
+	return ret;
+}
+
+/**
+ * Compare 128 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline bool
+rte_cmp128(const uint8_t *src_1, const uint8_t *src_2)
+{
+	bool ret;
+
+	ret = rte_cmp64(src_1 + 0 * 64, src_2 + 0 * 64);
+
+	if (likely(ret == 0))
+		ret = rte_cmp64(src_1 + 1 * 64, src_2 + 1 * 64);
+
+	return ret;
+}
+
+static inline bool
+rte_memcmp_remainder(const void *_src_1, const void *_src_2, size_t n)
+{
+	uintptr_t src_1u = (uintptr_t)_src_1;
+	uintptr_t src_2u = (uintptr_t)_src_2;
+
+	bool ret_1 = 1, ret_2 = 1, ret_4 = 1, ret_8 = 1;
+
+	/**
+	 * Compare less than 16 bytes
+	 */
+	if (n & 0x01) {
+		ret_1 = (*(uint8_t *)src_1u ==
+				*(const uint8_t *)src_2u);
+		src_1u = (uintptr_t)((const uint8_t *)src_1u + 1);
+		src_2u = (uintptr_t)((const uint8_t *)src_2u + 1);
+	}
+	if (n & 0x02) {
+		ret_2 = (*(uint16_t *)src_1u ==
+				*(const uint16_t *)src_2u);
+		src_1u = (uintptr_t)((const uint16_t *)src_1u + 1);
+		src_2u = (uintptr_t)((const uint16_t *)src_2u + 1);
+	}
+	if (n & 0x04) {
+		ret_4 = (*(uint32_t *)src_1u ==
+				*(const uint32_t *)src_2u);
+		src_1u = (uintptr_t)((const uint32_t *)src_1u + 1);
+		src_2u = (uintptr_t)((const uint32_t *)src_2u + 1);
+	}
+	if (n & 0x08) {
+		ret_8 = (*(uint64_t *)src_1u ==
+				*(const uint64_t *)src_2u);
+	}
+	return (!(ret_1 && ret_2 && ret_4 && ret_8));
+}
+
+static inline bool
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+	const uint8_t *src_1 = (const uint8_t *)_src_1;
+	const uint8_t *src_2 = (const uint8_t *)_src_2;
+	bool ret;
+
+	/**
+	 * Compare less than 16 bytes
+	 */
+	if (n < 16)
+		return rte_memcmp_remainder(_src_1, _src_2, n);
+
+	/**
+	 * Fast way when compare size exceeds 16 bytes
+	 */
+	if (n <= 32) {
+		if (likely(n & 0x20))
+			ret = rte_cmp32(src_1, src_2);
+		else {
+			ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+			if (likely(ret == 0))
+				ret = rte_memcmp_remainder(src_1 - 16 + n,
+						src_2 - 16 + n, n - 16);
+		}
+		return ret;
+	}
+
+	if (n <= 48) {
+		if (likely(n & 0x30)) {
+			ret = rte_cmp32(src_1, src_2);
+			if (likely(ret == 0))
+				ret = rte_cmp16(src_1 - 32 + n, src_2 - 32 + n);
+		} else {
+			ret = rte_cmp32(src_1, src_2);
+			if (likely(ret == 0))
+				ret = rte_memcmp_remainder(src_1 - 32 + n,
+						src_2 - 32 + n, n - 32);
+		}
+		return ret;
+	}
+
+	if (n <= 64) {
+		if (likely(n & 0x40))
+			ret = rte_cmp64(src_1, src_2);
+		else {
+			ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+			if (likely(ret == 0))
+				ret = rte_cmp16(src_1 - 32 + n,
+						src_2 - 32 + n);
+
+			if (likely(ret == 0))
+				ret = rte_memcmp_remainder(src_1 - 48 + n,
+						src_2 - 48 + n, n - 48);
+		}
+		return ret;
+	}
+
+	if (n <= 128) {
+		if (likely(n & 0x80))
+			ret = rte_cmp128(src_1, src_2);
+		else {
+			ret = rte_cmp64(src_1, src_2);
+			if (likely(ret == 0))
+				ret = rte_cmp32(src_1 - 64 + n, src_2 - 64 + n);
+
+			if (likely(ret == 0))
+				ret = rte_cmp16(src_1 - 96 + n, src_2 - 96 + n);
+
+			if (likely(ret == 0))
+				ret = rte_memcmp_remainder(src_1 - 112 + n,
+						src_2 - 112 + n, n - 112);
+		}
+		return ret;
+	}
+
+
+	return 0;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h
new file mode 100644
index 0000000..694c659
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memcmp.h
@@ -0,0 +1,119 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_H_
+#define _RTE_MEMCMP_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcmp().
+ */
+
+/**
+ * Compare 16 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src
+ *   Pointer to the second source of the data.
+ */
+static inline bool
+rte_cmp16(const uint8_t *src_1, const uint8_t *src_2);
+
+/**
+ * Compare 32 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ */
+static inline bool
+rte_cmp32(const uint8_t *src_1, const uint8_t *src_2);
+
+/**
+ * Compare 64 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src
+ *   Pointer to the second source of the data.
+ */
+static inline bool
+rte_cmp64(const uint8_t *src_1, const uint8_t *src_2);
+
+/**
+ * Compare 128 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ */
+static inline bool
+rte_cmp128(const uint8_t *src_1, const uint8_t *src_2);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ * @param n
+ *   Number of bytes to copy.
+ * @return
+ *   true if match otherwise false.
+ */
+static bool
+rte_memcmp(const void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+/*
+ * memcmp() function used by rte_memcmp macro
+ */
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+#endif /* _RTE_MEMCMP_H_ */
diff --git a/lib/librte_hash/rte_fbk_hash.c b/lib/librte_hash/rte_fbk_hash.c
index 356ddfe..5e796c9 100644
--- a/lib/librte_hash/rte_fbk_hash.c
+++ b/lib/librte_hash/rte_fbk_hash.c
@@ -40,6 +40,7 @@
 #include <sys/queue.h>
 #include <rte_memory.h>
 #include <rte_memzone.h>
+#include <rte_memcmp.h>
 #include <rte_eal.h>
 #include <rte_eal_memconfig.h>
 #include <rte_malloc.h>
@@ -83,7 +84,8 @@ rte_fbk_hash_find_existing(const char *name)
 	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
 	TAILQ_FOREACH(te, fbk_hash_list, next) {
 		h = (struct rte_fbk_hash_table *) te->data;
-		if (strncmp(name, h->name, RTE_FBK_HASH_NAMESIZE) == 0)
+		if (rte_memcmp(name, h->name,
+			RTE_MIN(strlen(name), strlen(h->name)) + 1) == 0)
 			break;
 	}
 	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
@@ -137,7 +139,8 @@ rte_fbk_hash_create(const struct rte_fbk_hash_params *params)
 	/* guarantee there's no existing */
 	TAILQ_FOREACH(te, fbk_hash_list, next) {
 		ht = (struct rte_fbk_hash_table *) te->data;
-		if (strncmp(params->name, ht->name, RTE_FBK_HASH_NAMESIZE) == 0)
+		if (rte_memcmp(params->name, ht->name,
+			RTE_MIN(strlen(params->name), strlen(ht->name)) + 1) == 0)
 			break;
 	}
 	if (te != NULL)
diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c
index 9245716..bd14f2b 100644
--- a/lib/librte_hash/rte_hash.c
+++ b/lib/librte_hash/rte_hash.c
@@ -42,6 +42,7 @@
 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
 #include <rte_log.h>
 #include <rte_memcpy.h>
+#include <rte_memcmp.h>
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
 #include <rte_memzone.h>
@@ -153,7 +154,8 @@ rte_hash_find_existing(const char *name)
 	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
 	TAILQ_FOREACH(te, hash_list, next) {
 		h = (struct rte_hash *) te->data;
-		if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
+		if (rte_memcmp(name, h->name,
+				RTE_MIN(strlen(name), strlen(h->name)) + 1) == 0)
 			break;
 	}
 	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
@@ -213,7 +215,8 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	/* guarantee there's no existing */
 	TAILQ_FOREACH(te, hash_list, next) {
 		h = (struct rte_hash *) te->data;
-		if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
+		if (rte_memcmp(params->name, h->name,
+			RTE_MIN(strlen(params->name), strlen(h->name)) + 1) == 0)
 			break;
 	}
 	if (te != NULL)
@@ -309,7 +312,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
 	/* Check if key is already present in the hash */
 	for (i = 0; i < h->bucket_entries; i++) {
 		if ((sig == sig_bucket[i]) &&
-		    likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
+		    likely(rte_memcmp(key, get_key_from_bucket(h, key_bucket, i),
 				  h->key_len) == 0)) {
 			return bucket_index * h->bucket_entries + i;
 		}
@@ -359,7 +362,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
 	/* Check if key is already present in the hash */
 	for (i = 0; i < h->bucket_entries; i++) {
 		if ((sig == sig_bucket[i]) &&
-		    likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
+		    likely(rte_memcmp(key, get_key_from_bucket(h, key_bucket, i),
 				  h->key_len) == 0)) {
 			sig_bucket[i] = NULL_SIGNATURE;
 			return bucket_index * h->bucket_entries + i;
@@ -401,7 +404,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
 	/* Check if key is already present in the hash */
 	for (i = 0; i < h->bucket_entries; i++) {
 		if ((sig == sig_bucket[i]) &&
-		    likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
+		    likely(rte_memcmp(key, get_key_from_bucket(h, key_bucket, i),
 				  h->key_len) == 0)) {
 			return bucket_index * h->bucket_entries + i;
 		}
@@ -457,7 +460,7 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 
 		for (j = 0; j < h->bucket_entries; j++) {
 			if ((sigs[i] == sig_bucket[j]) &&
-			    likely(memcmp(keys[i],
+			    likely(rte_memcmp(keys[i],
 					  get_key_from_bucket(h, key_bucket, j),
 					  h->key_len) == 0)) {
 				positions[i] = bucket_index *
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 821a9d4..d335d0b 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -54,7 +54,7 @@ extern "C" {
 #define RTE_HASH_BUCKET_ENTRIES_MAX		16
 
 /** Maximum length of key that can be used. */
-#define RTE_HASH_KEY_LENGTH_MAX			64
+#define RTE_HASH_KEY_LENGTH_MAX			128
 
 /** Max number of keys that can be searched for using rte_hash_lookup_multi. */
 #define RTE_HASH_LOOKUP_BULK_MAX		16
-- 
1.9.1

  reply	other threads:[~2015-04-22 15:33 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-22 15:33 [dpdk-dev] [PATCH] Implement rte_memcmp with AVX/SSE instructions Ravi Kerur
2015-04-22 15:33 ` Ravi Kerur [this message]
2015-04-23  7:24   ` [dpdk-dev] [PATCH] Implement memcmp using AVX/SSE instructio Pawel Wodkowski
2015-04-23  8:11     ` Bruce Richardson
2015-04-23  8:21       ` Luke Gorrie
2015-04-23  9:23       ` Ananyev, Konstantin
2015-04-23 13:53         ` Ravi Kerur
2015-04-23 14:00           ` Bruce Richardson
2015-04-23 22:26             ` Ravi Kerur
2015-05-05 21:56               ` Ravi Kerur
2015-04-23 13:43     ` Ravi Kerur

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1429716828-19012-2-git-send-email-rkerur@gmail.com \
    --to=rkerur@gmail.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).