* [dpdk-dev] [PATCH v3] Implement memcmp using Intel SIMD instrinsics.
2015-05-18 20:01 [dpdk-dev] [PATCH v3] Implement memcmp using SIMD intrinsics Ravi Kerur
@ 2015-05-18 20:01 ` Ravi Kerur
2015-10-14 0:32 ` Stephen Hemminger
2016-01-28 3:08 ` [dpdk-dev] [dpdk-dev, " Zhihong Wang
2015-06-12 8:30 ` [dpdk-dev] [PATCH v3] Implement memcmp using SIMD intrinsics Ondřej Bílka
1 sibling, 2 replies; 10+ messages in thread
From: Ravi Kerur @ 2015-05-18 20:01 UTC (permalink / raw)
To: dev
This patch implements memcmp and use librte_hash as the first candidate
to use rte_memcmp which is implemented using AVX/SSE intrinsics.
Tested with GCC(4.8.2) and Clang(3.4-1) compilers and both tests show better
performance on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu 14.04
x86_64 shows when compared to memcmp.
Changes in v3:
Implement complete memcmp functionality.
Implement functional and performance tests and add it to
"make test" infrastructure code.
Changes in v2:
Modified code to support only upto 64 bytes as that's the max bytes
used by hash for comparison.
Changes in v1:
Initial changes to support memcmp with support upto 128 bytes.
Signed-off-by: Ravi Kerur <rkerur@gmail.com>
---
app/test/Makefile | 5 +-
app/test/autotest_data.py | 19 +
app/test/test_hash_perf.c | 36 +-
app/test/test_memcmp.c | 229 ++++++
app/test/test_memcmp_perf.c | 339 ++++++++
.../common/include/arch/ppc_64/rte_memcmp.h | 62 ++
.../common/include/arch/x86/rte_memcmp.h | 900 +++++++++++++++++++++
lib/librte_eal/common/include/generic/rte_memcmp.h | 175 ++++
lib/librte_hash/rte_hash.c | 59 +-
9 files changed, 1789 insertions(+), 35 deletions(-)
create mode 100644 app/test/test_memcmp.c
create mode 100644 app/test/test_memcmp_perf.c
create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h
create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h
diff --git a/app/test/Makefile b/app/test/Makefile
index 4aca77c..957e4f1 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -81,6 +81,9 @@ SRCS-y += test_logs.c
SRCS-y += test_memcpy.c
SRCS-y += test_memcpy_perf.c
+SRCS-y += test_memcmp.c
+SRCS-y += test_memcmp_perf.c
+
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
@@ -150,7 +153,7 @@ CFLAGS_test_kni.o += -Wno-deprecated-declarations
endif
CFLAGS += -D_GNU_SOURCE
-# Disable VTA for memcpy test
+# Disable VTA for memcpy tests
ifeq ($(CC), gcc)
ifeq ($(shell test $(GCC_VERSION) -ge 44 && echo 1), 1)
CFLAGS_test_memcpy.o += -fno-var-tracking-assignments
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 618a946..e07f087 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -187,6 +187,12 @@ parallel_test_group_list = [
"Report" : None,
},
{
+ "Name" : "Memcmp autotest",
+ "Command" : "memcmp_autotest",
+ "Func" : default_autotest,
+ "Report" : None,
+ },
+ {
"Name" : "Memzone autotest",
"Command" : "memzone_autotest",
"Func" : default_autotest,
@@ -399,6 +405,19 @@ non_parallel_test_group_list = [
]
},
{
+ "Prefix": "memcmp_perf",
+ "Memory" : all_sockets(512),
+ "Tests" :
+ [
+ {
+ "Name" : "Memcmp performance autotest",
+ "Command" : "memcmp_perf_autotest",
+ "Func" : default_autotest,
+ "Report" : None,
+ },
+ ]
+},
+{
"Prefix": "hash_perf",
"Memory" : all_sockets(512),
"Tests" :
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..6887629 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -440,7 +440,7 @@ run_single_tbl_perf_test(const struct rte_hash *h, hash_operation func,
uint32_t *invalid_pos_count)
{
uint64_t begin, end, ticks = 0;
- uint8_t *key = NULL;
+ uint8_t * volatile key = NULL;
uint32_t *bucket_occupancies = NULL;
uint32_t num_buckets, i, j;
int32_t pos;
@@ -547,30 +547,30 @@ run_tbl_perf_test(struct tbl_perf_test_params *params)
case ADD_UPDATE:
num_iterations = params->num_iterations;
params->num_iterations = params->entries;
- run_single_tbl_perf_test(handle, rte_hash_add_key, params,
- &avg_occupancy, &invalid_pos);
- params->num_iterations = num_iterations;
ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
params, &avg_occupancy, &invalid_pos);
+ params->num_iterations = num_iterations;
+ ticks += run_single_tbl_perf_test(handle, rte_hash_add_key,
+ params, &avg_occupancy, &invalid_pos);
break;
case DELETE:
num_iterations = params->num_iterations;
params->num_iterations = params->entries;
- run_single_tbl_perf_test(handle, rte_hash_add_key, params,
- &avg_occupancy, &invalid_pos);
+ ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
+ params, &avg_occupancy, &invalid_pos);
params->num_iterations = num_iterations;
- ticks = run_single_tbl_perf_test(handle, rte_hash_del_key,
+ ticks += run_single_tbl_perf_test(handle, rte_hash_del_key,
params, &avg_occupancy, &invalid_pos);
break;
case LOOKUP:
num_iterations = params->num_iterations;
params->num_iterations = params->entries;
- run_single_tbl_perf_test(handle, rte_hash_add_key, params,
- &avg_occupancy, &invalid_pos);
+ ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
+ params, &avg_occupancy, &invalid_pos);
params->num_iterations = num_iterations;
- ticks = run_single_tbl_perf_test(handle, rte_hash_lookup,
+ ticks += run_single_tbl_perf_test(handle, rte_hash_lookup,
params, &avg_occupancy, &invalid_pos);
break;
default: return -1;
@@ -623,10 +623,15 @@ static int run_all_tbl_perf_tests(void)
static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
uint32_t key_len)
{
- static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+ static uint8_t * volatile key;
uint64_t ticks = 0, start, end;
unsigned i, j;
+ key = rte_zmalloc("func hash key",
+ key_len * sizeof(uint8_t), 16);
+ if (key == NULL)
+ return;
+
for (i = 0; i < HASHTEST_ITERATIONS; i++) {
for (j = 0; j < key_len; j++)
@@ -638,8 +643,11 @@ static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
ticks += end - start;
}
- printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
- (unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
+ rte_free(key);
+
+ printf("%-12s, %-18u, %-13u, %.02f\n",
+ get_hash_name(f), (unsigned) key_len, (unsigned) init_val,
+ (double)ticks / HASHTEST_ITERATIONS);
}
/*
@@ -687,7 +695,7 @@ fbk_hash_perf_test(void)
.socket_id = rte_socket_id(),
};
struct rte_fbk_hash_table *handle = NULL;
- uint32_t *keys = NULL;
+ uint32_t * volatile keys = NULL;
unsigned indexes[TEST_SIZE];
uint64_t lookup_time = 0;
unsigned added = 0;
diff --git a/app/test/test_memcmp.c b/app/test/test_memcmp.c
new file mode 100644
index 0000000..7d9c85f
--- /dev/null
+++ b/app/test/test_memcmp.c
@@ -0,0 +1,229 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_random.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_memcmp.h>
+
+#include "test.h"
+
+/*******************************************************************************
+ * Memcmp function performance test configuration section.
+ * Each performance test will be performed HASHTEST_ITERATIONS times.
+ *
+ * The five arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+static size_t memcmp_sizes[] = {
+ 1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129, 255,
+ 256, 257, 320, 384, 511, 512, 513, 1023, 1024, 1025, 1518, 1522, 1600,
+ 2048, 3072, 4096, 5120, 6144, 7168, 8192, 16384
+};
+
+/******************************************************************************/
+
+#define RTE_MEMCMP_LENGTH_MAX 16384
+
+/*
+ * Test a memcmp equal function.
+ */
+static int run_memcmp_eq_func_test(uint32_t len)
+{
+ uint32_t i, rc = 0;
+ uint8_t * volatile key = NULL;
+
+ key = rte_zmalloc("memcmp key", len * sizeof(uint8_t), 16);
+ if (key == NULL)
+ return -1;
+
+ for (i = 0; i < len; i++)
+ key[i] = (uint8_t) rte_rand();
+
+ rc = rte_memcmp(key, key, len);
+ rte_free(key);
+
+ return rc;
+}
+
+/*
+ * Test memcmp equal functions.
+ */
+static int run_memcmp_eq_func_tests(void)
+{
+ unsigned i;
+
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ if (run_memcmp_eq_func_test(memcmp_sizes[i])) {
+ printf("Comparing equal %zd bytes failed\n", memcmp_sizes[i]);
+ return 1;
+ }
+ }
+ printf("RTE memcmp for equality successful\n");
+ return 0;
+}
+
+/*
+ * Test a memcmp less than function.
+ */
+static int run_memcmp_lt_func_test(uint32_t len)
+{
+ uint32_t i, rc;
+ uint8_t * volatile key_1 = NULL;
+ uint8_t * volatile key_2 = NULL;
+
+ key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16);
+ if (key_1 == NULL)
+ return -1;
+
+ key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16);
+ if (key_2 == NULL)
+ return -1;
+
+ for (i = 0; i < len; i++)
+ key_1[i] = i;
+
+ for (i = 0; i < len; i++)
+ key_2[i] = 2;
+
+ rc = rte_memcmp(key_1, key_2, len);
+ rte_free(key_1);
+ rte_free(key_2);
+
+ return rc;
+}
+
+/*
+ * Test memcmp less than functions.
+ */
+static int run_memcmp_lt_func_tests(void)
+{
+ unsigned i;
+
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ if (!(run_memcmp_lt_func_test(memcmp_sizes[i]) < 0)) {
+ printf("Comparing less than for %zd bytes failed\n", memcmp_sizes[i]);
+ return 1;
+ }
+ }
+ printf("RTE memcmp for less than successful\n");
+ return 0;
+}
+
+/*
+ * Test a memcmp greater than function.
+ */
+static int run_memcmp_gt_func_test(uint32_t len)
+{
+ uint32_t i, rc;
+ uint8_t * volatile key_1 = NULL;
+ uint8_t * volatile key_2 = NULL;
+
+ key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16);
+ if (key_1 == NULL)
+ return -1;
+
+ key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16);
+ if (key_2 == NULL)
+ return -1;
+
+ for (i = 0; i < len; i++)
+ key_1[i] = 2;
+
+ for (i = 0; i < len; i++)
+ key_2[i] = i;
+
+ rc = rte_memcmp(key_1, key_2, len);
+ rte_free(key_1);
+ rte_free(key_2);
+
+ return rc;
+}
+
+/*
+ * Test memcmp less than functions.
+ */
+static int run_memcmp_gt_func_tests(void)
+{
+ unsigned i;
+
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ if (!(run_memcmp_gt_func_test(memcmp_sizes[i]) > 0)) {
+ printf("Comparing greater than for %zd bytes failed\n", memcmp_sizes[i]);
+ return 1;
+ }
+ }
+ printf("RTE memcmp for greater than successful\n");
+ return 0;
+}
+
+/*
+ * Do all unit and performance tests.
+ */
+static int
+test_memcmp(void)
+{
+ if (run_memcmp_eq_func_tests())
+ return -1;
+
+ if (run_memcmp_gt_func_tests())
+ return -1;
+
+ if (run_memcmp_lt_func_tests())
+ return -1;
+
+ return 0;
+}
+
+static struct test_command memcmp_cmd = {
+ .command = "memcmp_autotest",
+ .callback = test_memcmp,
+};
+REGISTER_TEST_COMMAND(memcmp_cmd);
diff --git a/app/test/test_memcmp_perf.c b/app/test/test_memcmp_perf.c
new file mode 100644
index 0000000..8b7a0c4
--- /dev/null
+++ b/app/test/test_memcmp_perf.c
@@ -0,0 +1,339 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <sys/times.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_random.h>
+#include <rte_memory.h>
+#include <rte_memcmp.h>
+
+#include "test.h"
+
+/*******************************************************************************
+ * Memcmp function performance test configuration section. Each performance test
+ * will be performed MEMCMP_ITERATIONS times.
+ *
+ * The five arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+#define MEMCMP_ITERATIONS 500 * 500 * 500
+
+static size_t memcmp_sizes[] = {
+ 2, 5, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128,
+ 129, 191, 192, 193, 255, 256, 257, 319, 320, 321, 383, 384, 385, 447, 448,
+ 449, 511, 512, 513, 767, 768, 769, 1023, 1024, 1025, 1522, 1536, 1600,
+ 2048, 2560, 3072, 3584, 4096, 4608, 5632, 6144, 6656, 7168, 7680, 8192,
+ 16834
+};
+
+static size_t memcmp_lt_gt_sizes[] = {
+ 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192};
+
+/******************************************************************************/
+
+static int
+run_single_memcmp_eq_perf_test(uint32_t len, int func_type, uint64_t iterations)
+{
+ double begin = 0, end = 0;
+ uint64_t i, j, rc = 0;
+ uint8_t * volatile key = NULL;
+
+ key = rte_zmalloc("memcmp key", len * sizeof(uint8_t), 16);
+ if (key == NULL)
+ return -1;
+
+ /* Prepare inputs for the current iteration */
+ for (j = 0; j < len; j++)
+ key[j] = j / 64;
+
+ begin = rte_rdtsc();
+
+ /* Perform operation, and measure time it takes */
+ for (i = 0; i < iterations; i++) {
+
+ if (func_type == 1)
+ rc += rte_memcmp(key, key, len);
+ else
+ rc += memcmp(key, key, len);
+ }
+
+ end = rte_rdtsc() - begin;
+
+ printf(" *** %10i, %10.4f ***\n", len, (double)(end/iterations));
+
+ rte_free(key);
+
+ return rc;
+}
+
+/*
+ * Run all memcmp table performance tests.
+ */
+static int run_all_memcmp_eq_perf_tests(void)
+{
+ unsigned i;
+
+ printf(" *** RTE memcmp equal performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_eq_perf_test(memcmp_sizes[i], 1,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+
+ printf(" *** memcmp equal performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_eq_perf_test(memcmp_sizes[i], 2,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+run_single_memcmp_lt_perf_test(uint32_t len, int func_type,
+ uint64_t iterations)
+{
+ double begin = 0, end = 0;
+ uint64_t i, j;
+ uint8_t * volatile key_1 = NULL;
+ uint8_t * volatile key_2 = NULL;
+
+ key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16);
+ if (key_1 == NULL)
+ return -1;
+
+ key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16);
+ if (key_2 == NULL) {
+ rte_free(key_1);
+ return -1;
+ }
+
+ /* Prepare inputs for the current iteration */
+ for (j = 0; j < len; j++)
+ key_1[j] = 1;
+
+ for (j = 0; j < len; j++)
+ key_2[j] = 1;
+
+ key_2[len / 2] = 2;
+
+ begin = rte_rdtsc();
+
+ /* Perform operation, and measure time it takes */
+ for (i = 0; i < iterations; i++) {
+
+ if (func_type == 1) {
+ if (!(rte_memcmp(key_1, key_2, len) < 0))
+ return -1;
+ } else {
+ if (!(memcmp(key_1, key_2, len) < 0))
+ return -1;
+ }
+ }
+
+ end = rte_rdtsc() - begin;
+
+ printf(" *** %10i, %10.4f ***\n", len, (double)(end/iterations));
+
+ rte_free(key_1);
+ rte_free(key_2);
+
+ return 0;
+}
+
+/*
+ * Run all memcmp table performance tests.
+ */
+static int run_all_memcmp_lt_perf_tests(void)
+{
+ unsigned i;
+
+ printf(" *** RTE memcmp less than performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_lt_perf_test(memcmp_lt_gt_sizes[i], 1,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+
+ printf(" *** memcmp less than performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_lt_perf_test(memcmp_lt_gt_sizes[i], 2,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+run_single_memcmp_gt_perf_test(uint32_t len, int func_type,
+ uint64_t iterations)
+{
+ double begin = 0, end = 0;
+ uint64_t i, j;
+ uint8_t * volatile key_1 = NULL;
+ uint8_t * volatile key_2 = NULL;
+
+ key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16);
+ if (key_1 == NULL)
+ return -1;
+
+ key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16);
+ if (key_2 == NULL) {
+ rte_free(key_1);
+ return -1;
+ }
+
+ /* Prepare inputs for the current iteration */
+ for (j = 0; j < len; j++)
+ key_1[j] = 1;
+ key_1[len / 2] = 2;
+
+ for (j = 0; j < len; j++)
+ key_2[j] = 1;
+
+ begin = rte_rdtsc();
+
+ /* Perform operation, and measure time it takes */
+ for (i = 0; i < iterations; i++) {
+
+ if (func_type == 1) {
+ if (!(rte_memcmp(key_1, key_2, len) > 0))
+ return -1;
+ } else {
+ if (!(memcmp(key_1, key_2, len) > 0))
+ return -1;
+ }
+ }
+
+ end = rte_rdtsc() - begin;
+
+ printf(" *** %10i, %10.4f ***\n", len, (double)(end/iterations));
+
+ rte_free(key_1);
+ rte_free(key_2);
+
+ return 0;
+}
+
+/*
+ * Run all memcmp table performance tests.
+ */
+static int run_all_memcmp_gt_perf_tests(void)
+{
+ unsigned i;
+
+ printf(" *** RTE memcmp greater than performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_gt_perf_test(memcmp_lt_gt_sizes[i], 1,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+
+ printf(" *** memcmp greater than performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_gt_perf_test(memcmp_lt_gt_sizes[i], 2,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Do all performance tests.
+ */
+static int
+test_memcmp_perf(void)
+{
+ if (run_all_memcmp_eq_perf_tests() != 0)
+ return -1;
+
+ if (run_all_memcmp_lt_perf_tests() != 0)
+ return -1;
+
+ if (run_all_memcmp_gt_perf_tests() != 0)
+ return -1;
+
+ return 0;
+}
+
+static struct test_command memcmp_perf_cmd = {
+ .command = "memcmp_perf_autotest",
+ .callback = test_memcmp_perf,
+};
+REGISTER_TEST_COMMAND(memcmp_perf_cmd);
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
new file mode 100644
index 0000000..6e54f3b
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
@@ -0,0 +1,62 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCMP_PPC_64_H_
+#define _RTE_MEMCMP_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+/*To include altivec.h, GCC version must >= 4.8 */
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcmp.h"
+
+#define rte_memcmp(dst, src, n) \
+ ({ (__builtin_constant_p(n)) ? \
+ memcmp((dst), (src), (n)) : \
+ rte_memcmp_func((dst), (src), (n)); })
+
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n)
+{
+ return memcmp(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
new file mode 100644
index 0000000..085dfb2
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
@@ -0,0 +1,900 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_X86_64_H_
+#define _RTE_MEMCMP_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2 implementation of memcmp().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include <rte_vect.h>
+#include <rte_branch_prediction.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to compare.
+ * @return
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_memcmp(const void *src_1, const void *src,
+ size_t n) __attribute__((always_inline));
+
+/**
+ * Find the first different bit for comparison.
+ */
+static inline int
+rte_cmpffd (uint32_t x, uint32_t y)
+{
+ int i;
+ int pos = x ^ y;
+ for (i = 0; i < 32; i++)
+ if (pos & (1<<i))
+ return i;
+ return -1;
+}
+
+/**
+ * Find the first different byte for comparison.
+ */
+static inline int
+rte_cmpffdb (const uint8_t *x, const uint8_t *y, size_t n)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ if (x[i] != y[i])
+ return x[i] - y[i];
+ return 0;
+}
+
+/**
+ * Compare 16 bytes between two locations.
+ * locations should not overlap.
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2)
+{
+ __m128i xmm0, xmm1, xmm2;
+
+ xmm0 = _mm_lddqu_si128((const __m128i *)src_1);
+ xmm1 = _mm_lddqu_si128((const __m128i *)src_2);
+ xmm2 = _mm_xor_si128(xmm0, xmm1);
+
+ if (unlikely(!_mm_testz_si128(xmm2, xmm2))) {
+
+ uint64_t mm11 = _mm_extract_epi64(xmm0, 0);
+ uint64_t mm12 = _mm_extract_epi64(xmm0, 1);
+
+ uint64_t mm21 = _mm_extract_epi64(xmm1, 0);
+ uint64_t mm22 = _mm_extract_epi64(xmm1, 1);
+
+ if (mm11 == mm21)
+ return rte_cmpffdb((const uint8_t *)&mm12,
+ (const uint8_t *)&mm22, 8);
+ else
+ return rte_cmpffdb((const uint8_t *)&mm11,
+ (const uint8_t *)&mm21, 8);
+ }
+
+ return 0;
+}
+
+/**
+ * Compare 0 to 15 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_memcmp_regular(const uint8_t *src_1u, const uint8_t *src_2u, size_t n)
+{
+ int ret = 1;
+
+ /**
+ * Compare less than 16 bytes
+ */
+ if (n & 0x08) {
+ ret = (*(const uint64_t *)src_1u ==
+ *(const uint64_t *)src_2u);
+
+ if ((ret != 1))
+ goto exit_8;
+
+ n -= 0x8;
+ src_1u += 0x8;
+ src_2u += 0x8;
+ }
+
+ if (n & 0x04) {
+ ret = (*(const uint32_t *)src_1u ==
+ *(const uint32_t *)src_2u);
+
+ if ((ret != 1))
+ goto exit_4;
+
+ n -= 0x4;
+ src_1u += 0x4;
+ src_2u += 0x4;
+ }
+
+ if (n & 0x02) {
+ ret = (*(const uint16_t *)src_1u ==
+ *(const uint16_t *)src_2u);
+
+ if ((ret != 1))
+ goto exit_2;
+
+ n -= 0x2;
+ src_1u += 0x2;
+ src_2u += 0x2;
+ }
+
+ if (n & 0x01) {
+ ret = (*(const uint8_t *)src_1u ==
+ *(const uint8_t *)src_2u);
+
+ if ((ret != 1))
+ goto exit_1;
+
+ n -= 0x1;
+ src_1u += 0x1;
+ src_2u += 0x1;
+ }
+
+ return !ret;
+
+exit_8:
+ return rte_cmpffdb(src_1u, src_2u, 8);
+exit_4:
+ return rte_cmpffdb(src_1u, src_2u, 4);
+exit_2:
+ return rte_cmpffdb(src_1u, src_2u, 2);
+exit_1:
+ return rte_cmpffdb(src_1u, src_2u, 1);
+}
+
+/**
+ * AVX2 implementation below
+ */
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+ const __m128i* src1 = (const __m128i*)src_1;
+ const __m128i* src2 = (const __m128i*)src_2;
+ const uint8_t *s1, *s2;
+
+ __m128i mm11 = _mm_lddqu_si128(src1);
+ __m128i mm12 = _mm_lddqu_si128(src1 + 1);
+ __m128i mm21 = _mm_lddqu_si128(src2);
+ __m128i mm22 = _mm_lddqu_si128(src2 + 1);
+
+ __m128i mm1 = _mm_xor_si128(mm11, mm21);
+ __m128i mm2 = _mm_xor_si128(mm12, mm22);
+ __m128i mm = _mm_or_si128(mm1, mm2);
+
+ if (unlikely(!_mm_testz_si128(mm, mm))) {
+
+ /*
+ * Find out which of the two 16-byte blocks
+ * are different.
+ */
+ if (_mm_testz_si128(mm1, mm1)) {
+ mm11 = mm12;
+ mm21 = mm22;
+ mm1 = mm2;
+ s1 = (const uint8_t *)(src1 + 1);
+ s2 = (const uint8_t *)(src2 + 1);
+ } else {
+ s1 = (const uint8_t *)src1;
+ s2 = (const uint8_t *)src2;
+ }
+
+ // Produce the comparison result
+ __m128i mm_cmp = _mm_cmpgt_epi8(mm11, mm21);
+ __m128i mm_rcmp = _mm_cmpgt_epi8(mm21, mm11);
+ mm_cmp = _mm_xor_si128(mm1, mm_cmp);
+ mm_rcmp = _mm_xor_si128(mm1, mm_rcmp);
+
+ uint32_t cmp = _mm_movemask_epi8(mm_cmp);
+ uint32_t rcmp = _mm_movemask_epi8(mm_rcmp);
+
+ int cmp_b = rte_cmpffd(cmp, rcmp);
+
+ int ret = (cmp_b == -1) ? 0 : (s1[cmp_b] - s2[cmp_b]);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * Compare 48 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32,
+ (const uint8_t *)src_2 + 0 * 32);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 32,
+ (const uint8_t *)src_2 + 1 * 32);
+ return ret;
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp64 (const void* src_1, const void* src_2)
+{
+ const __m256i* src1 = (const __m256i*)src_1;
+ const __m256i* src2 = (const __m256i*)src_2;
+ const uint8_t *s1, *s2;
+
+ __m256i mm11 = _mm256_lddqu_si256(src1);
+ __m256i mm12 = _mm256_lddqu_si256(src1 + 1);
+ __m256i mm21 = _mm256_lddqu_si256(src2);
+ __m256i mm22 = _mm256_lddqu_si256(src2 + 1);
+
+ __m256i mm1 = _mm256_xor_si256(mm11, mm21);
+ __m256i mm2 = _mm256_xor_si256(mm12, mm22);
+ __m256i mm = _mm256_or_si256(mm1, mm2);
+
+ if (unlikely(!_mm256_testz_si256(mm, mm))) {
+ /*
+ * Find out which of the two 32-byte blocks
+ * are different.
+ */
+ if (_mm256_testz_si256(mm1, mm1)) {
+ mm11 = mm12;
+ mm21 = mm22;
+ mm1 = mm2;
+ s1 = (const uint8_t *)(src1 + 1);
+ s2 = (const uint8_t *)(src2 + 1);
+ } else {
+ s1 = (const uint8_t *)src1;
+ s2 = (const uint8_t *)src2;
+ }
+
+ // Produce the comparison result
+ __m256i mm_cmp = _mm256_cmpgt_epi8(mm11, mm21);
+ __m256i mm_rcmp = _mm256_cmpgt_epi8(mm21, mm11);
+ mm_cmp = _mm256_xor_si256(mm1, mm_cmp);
+ mm_rcmp = _mm256_xor_si256(mm1, mm_rcmp);
+
+ uint32_t cmp = _mm256_movemask_epi8(mm_cmp);
+ uint32_t rcmp = _mm256_movemask_epi8(mm_rcmp);
+
+ int cmp_b = rte_cmpffd(cmp, rcmp);
+
+ int ret = (cmp_b == -1) ? 0 : (s1[cmp_b] - s2[cmp_b]);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * Compare 128 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64,
+ (const uint8_t *)src_2 + 0 * 64);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp64((const uint8_t *)src_1 + 1 * 64,
+ (const uint8_t *)src_2 + 1 * 64);
+}
+
+/**
+ * Compare 256 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp256(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64,
+ (const uint8_t *)src_2 + 0 * 64);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp64((const uint8_t *)src_1 + 1 * 64,
+ (const uint8_t *)src_2 + 1 * 64);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp64((const uint8_t *)src_1 + 2 * 64,
+ (const uint8_t *)src_2 + 2 * 64);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp64((const uint8_t *)src_1 + 3 * 64,
+ (const uint8_t *)src_2 + 3 * 64);
+}
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to compare.
+ * @return
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+ const uint8_t *src_1 = (const uint8_t *)_src_1;
+ const uint8_t *src_2 = (const uint8_t *)_src_2;
+ int ret = 0;
+
+ if (n < 16)
+ return rte_memcmp_regular(src_1, src_2, n);
+
+ if (n <= 32) {
+ ret = rte_cmp16(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 48) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 64) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 32, src_2 + 32);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 96) {
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 64, src_2 + 64);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 128) {
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp32(src_1 + 64, src_2 + 64);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 96, src_2 + 96);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+CMP_BLOCK_LESS_THAN_512:
+ if (n <= 512) {
+ if (n >= 256) {
+ ret = rte_cmp256(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ src_1 = src_1 + 256;
+ src_2 = src_2 + 256;
+ n -= 256;
+ }
+ if (n >= 128) {
+ ret = rte_cmp128(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ src_1 = src_1 + 128;
+ src_2 = src_2 + 128;
+ n -= 128;
+ }
+ if (n >= 64) {
+ n -= 64;
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ src_1 = src_1 + 64;
+ src_2 = src_2 + 64;
+ }
+ if (n > 32) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+ return ret;
+ }
+ if (n > 0)
+ ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+
+ return ret;
+ }
+
+ while (n > 512) {
+ ret = rte_cmp256(src_1 + 0 * 256, src_2 + 0 * 256);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp256(src_1 + 1 * 256, src_2 + 1 * 256);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 512;
+ src_2 = src_2 + 512;
+ n -= 512;
+ }
+ goto CMP_BLOCK_LESS_THAN_512;
+}
+
+#else /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+}
+
+/**
+ * Compare 48 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+ (const uint8_t *)src_2 + 2 * 16);
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+ (const uint8_t *)src_2 + 2 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 3 * 16,
+ (const uint8_t *)src_2 + 3 * 16);
+}
+
+/**
+ * Compare 128 bytes or its multiple between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+ (const uint8_t *)src_2 + 2 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 3 * 16,
+ (const uint8_t *)src_2 + 3 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 4 * 16,
+ (const uint8_t *)src_2 + 4 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 5 * 16,
+ (const uint8_t *)src_2 + 5 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 6 * 16,
+ (const uint8_t *)src_2 + 6 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 7 * 16,
+ (const uint8_t *)src_2 + 7 * 16);
+}
+
+/**
+ * Compare 256 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp256(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+ (const uint8_t *)src_2 + 2 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 3 * 16,
+ (const uint8_t *)src_2 + 3 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 4 * 16,
+ (const uint8_t *)src_2 + 4 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 5 * 16,
+ (const uint8_t *)src_2 + 5 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 6 * 16,
+ (const uint8_t *)src_2 + 6 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 7 * 16,
+ (const uint8_t *)src_2 + 7 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 8 * 16,
+ (const uint8_t *)src_2 + 8 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 9 * 16,
+ (const uint8_t *)src_2 + 9 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 10 * 16,
+ (const uint8_t *)src_2 + 10 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 11 * 16,
+ (const uint8_t *)src_2 + 11 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 12 * 16,
+ (const uint8_t *)src_2 + 12 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 13 * 16,
+ (const uint8_t *)src_2 + 13 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 14 * 16,
+ (const uint8_t *)src_2 + 14 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 15 * 16,
+ (const uint8_t *)src_2 + 15 * 16);
+}
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to compare.
+ * @return
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+ const uint8_t *src_1 = (const uint8_t *)_src_1;
+ const uint8_t *src_2 = (const uint8_t *)_src_2;
+ int ret = 0;
+
+ if (n < 16)
+ return rte_memcmp_regular(src_1, src_2, n);
+
+ if (n <= 32) {
+ ret = rte_cmp16(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 48) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 64) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 32, src_2 + 32);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 96) {
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 64, src_2 + 64);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 128)
+ goto CMP_BLOCK_LESS_THAN_128;
+
+ if (n <= 512) {
+ if (n >= 256) {
+ ret = rte_cmp256(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 256;
+ src_2 = src_2 + 256;
+ n -= 256;
+ }
+
+CMP_BLOCK_LESS_THAN_256:
+ if (n >= 128) {
+ ret = rte_cmp128(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 128;
+ src_2 = src_2 + 128;
+ n -= 128;
+ }
+
+CMP_BLOCK_LESS_THAN_128:
+ if (n >= 64) {
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 64;
+ src_2 = src_2 + 64;
+ n -= 64;
+ }
+
+ if (n >= 32) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ src_1 = src_1 + 32;
+ src_2 = src_2 + 32;
+ n -= 32;
+ }
+ if (n > 16) {
+ ret = rte_cmp16(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ return ret;
+ }
+ if (n > 0)
+ ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+
+ return ret;
+ }
+
+ for (; n >= 256; n -= 256) {
+ ret = rte_cmp256(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 256;
+ src_2 = src_2 + 256;
+ }
+
+ goto CMP_BLOCK_LESS_THAN_256;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h
new file mode 100644
index 0000000..5e68036
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memcmp.h
@@ -0,0 +1,175 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_H_
+#define _RTE_MEMCMP_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcmp().
+ */
+
+/**
+ * Find the first different bit for comparison.
+ */
+static inline int
+rte_cmpffd (uint32_t x, uint32_t y);
+
+/**
+ * Find the first different byte for comparison.
+ */
+static inline int
+rte_cmpffdb (const uint8_t *x, const uint8_t *y, size_t n);
+
+/**
+ * Compare 16 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2);
+
+/**
+ * Compare 32 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2);
+
+/**
+ * Compare 64 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2);
+
+/**
+ * Compare 48 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2);
+
+/**
+ * Compare 128 bytes between two locations using
+ * optimised instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2);
+
+/**
+ * Compare 256 bytes or greater between two locations using
+ * optimised instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp256(const void *src_1, const void *src_2);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to copy.
+ * @return
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static int
+rte_memcmp(const void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+/*
+ * memcmp() function used by rte_memcmp macro
+ */
+static inline int
+rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+#endif /* _RTE_MEMCMP_H_ */
diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c
index 9245716..075da62 100644
--- a/lib/librte_hash/rte_hash.c
+++ b/lib/librte_hash/rte_hash.c
@@ -42,6 +42,7 @@
#include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */
#include <rte_log.h>
#include <rte_memcpy.h>
+#include <rte_memcmp.h>
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
#include <rte_memzone.h>
@@ -299,6 +300,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
uint8_t *key_bucket;
uint32_t bucket_index, i;
int32_t pos;
+ const void * volatile key_1 = key;
/* Get the hash signature and bucket index */
sig |= h->sig_msb;
@@ -308,10 +310,13 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
- if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
- h->key_len) == 0)) {
- return bucket_index * h->bucket_entries + i;
+ if (sig == sig_bucket[i]) {
+
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, i);
+
+ if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0))
+ return bucket_index * h->bucket_entries + i;
}
}
@@ -350,6 +355,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
uint8_t *key_bucket;
uint32_t bucket_index, i;
+ const void * volatile key_1 = key;
+
/* Get the hash signature and bucket index */
sig = sig | h->sig_msb;
bucket_index = sig & h->bucket_bitmask;
@@ -358,11 +365,14 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
- if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
- h->key_len) == 0)) {
- sig_bucket[i] = NULL_SIGNATURE;
- return bucket_index * h->bucket_entries + i;
+ if (sig == sig_bucket[i]) {
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, i);
+
+ if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) {
+ sig_bucket[i] = NULL_SIGNATURE;
+ return bucket_index * h->bucket_entries + i;
+ }
}
}
@@ -392,6 +402,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
uint8_t *key_bucket;
uint32_t bucket_index, i;
+ const void * volatile key_1 = key;
+
/* Get the hash signature and bucket index */
sig |= h->sig_msb;
bucket_index = sig & h->bucket_bitmask;
@@ -400,10 +412,13 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
- if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
- h->key_len) == 0)) {
- return bucket_index * h->bucket_entries + i;
+ if (sig == sig_bucket[i]) {
+
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, i);
+
+ if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0))
+ return bucket_index * h->bucket_entries + i;
}
}
@@ -456,13 +471,17 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
positions[i] = -ENOENT;
for (j = 0; j < h->bucket_entries; j++) {
- if ((sigs[i] == sig_bucket[j]) &&
- likely(memcmp(keys[i],
- get_key_from_bucket(h, key_bucket, j),
- h->key_len) == 0)) {
- positions[i] = bucket_index *
- h->bucket_entries + j;
- break;
+ if (sigs[i] == sig_bucket[j]) {
+
+ const void * volatile key_1 = keys[i];
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, j);
+ if (likely(rte_memcmp(key_1, key_2,
+ h->key_len) == 0)) {
+ positions[i] = bucket_index *
+ h->bucket_entries + j;
+ break;
+ }
}
}
}
--
1.9.1
^ permalink raw reply [flat|nested] 10+ messages in thread