From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pd0-f175.google.com (mail-pd0-f175.google.com [209.85.192.175]) by dpdk.org (Postfix) with ESMTP id ECD59B38D for ; Mon, 18 May 2015 22:01:48 +0200 (CEST) Received: by pdbnk13 with SMTP id nk13so67033344pdb.1 for ; Mon, 18 May 2015 13:01:48 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=FMVtg5GL33VuJ2rTUwnV6QpEw6ge7dq5zqUM5SUU1D4=; b=O8kVTB/GE8yfnY/JDcSbpC54Y3ScRTu38xW94vCiI95Wqq7bZau6gFnhEsJFFe974V c5a7z2PCVIk4rTvX88mGfrJN6FKHmdlS7F3JRNAnVws3I5pMKoc5QMHfdvzmERP2XEB2 SznmJTp82QkH2C7qUl4m+hBiaCrni+graYBUFbqE9/OvSiquptNYhHAEKactnJGk5xRM mvkdZFFU1VfPTPaMMwE4zuu1+XW4C9v4vVGKL18oTA6kJqiNDQHj8tBQS6ZIBo7tQrPX eNqEUQKcT7v+ryF8LsJREjnRDSOGThF6OgMGTwEdhcNzAkddvQ3kofnzXMWnjldAIv8D IAeg== X-Received: by 10.68.211.197 with SMTP id ne5mr1683904pbc.156.1431979308390; Mon, 18 May 2015 13:01:48 -0700 (PDT) Received: from user-PC.hsd1.ca.comcast.net (c-98-234-176-9.hsd1.ca.comcast.net. [98.234.176.9]) by mx.google.com with ESMTPSA id pa1sm10861995pdb.73.2015.05.18.13.01.47 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Mon, 18 May 2015 13:01:47 -0700 (PDT) From: Ravi Kerur To: dev@dpdk.org Date: Mon, 18 May 2015 13:01:43 -0700 Message-Id: <1431979303-1346-2-git-send-email-rkerur@gmail.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1431979303-1346-1-git-send-email-rkerur@gmail.com> References: <1431979303-1346-1-git-send-email-rkerur@gmail.com> Subject: [dpdk-dev] [PATCH v3] Implement memcmp using Intel SIMD instrinsics. X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 18 May 2015 20:01:49 -0000 This patch implements memcmp and use librte_hash as the first candidate to use rte_memcmp which is implemented using AVX/SSE intrinsics. Tested with GCC(4.8.2) and Clang(3.4-1) compilers and both tests show better performance on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu 14.04 x86_64 shows when compared to memcmp. Changes in v3: Implement complete memcmp functionality. Implement functional and performance tests and add it to "make test" infrastructure code. Changes in v2: Modified code to support only upto 64 bytes as that's the max bytes used by hash for comparison. Changes in v1: Initial changes to support memcmp with support upto 128 bytes. Signed-off-by: Ravi Kerur --- app/test/Makefile | 5 +- app/test/autotest_data.py | 19 + app/test/test_hash_perf.c | 36 +- app/test/test_memcmp.c | 229 ++++++ app/test/test_memcmp_perf.c | 339 ++++++++ .../common/include/arch/ppc_64/rte_memcmp.h | 62 ++ .../common/include/arch/x86/rte_memcmp.h | 900 +++++++++++++++++++++ lib/librte_eal/common/include/generic/rte_memcmp.h | 175 ++++ lib/librte_hash/rte_hash.c | 59 +- 9 files changed, 1789 insertions(+), 35 deletions(-) create mode 100644 app/test/test_memcmp.c create mode 100644 app/test/test_memcmp_perf.c create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h diff --git a/app/test/Makefile b/app/test/Makefile index 4aca77c..957e4f1 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -81,6 +81,9 @@ SRCS-y += test_logs.c SRCS-y += test_memcpy.c SRCS-y += test_memcpy_perf.c +SRCS-y += test_memcmp.c +SRCS-y += test_memcmp_perf.c + SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c @@ -150,7 +153,7 @@ CFLAGS_test_kni.o += -Wno-deprecated-declarations endif CFLAGS += -D_GNU_SOURCE -# Disable VTA for memcpy test +# Disable VTA for memcpy tests ifeq ($(CC), gcc) ifeq ($(shell test $(GCC_VERSION) -ge 44 && echo 1), 1) CFLAGS_test_memcpy.o += -fno-var-tracking-assignments diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py index 618a946..e07f087 100644 --- a/app/test/autotest_data.py +++ b/app/test/autotest_data.py @@ -187,6 +187,12 @@ parallel_test_group_list = [ "Report" : None, }, { + "Name" : "Memcmp autotest", + "Command" : "memcmp_autotest", + "Func" : default_autotest, + "Report" : None, + }, + { "Name" : "Memzone autotest", "Command" : "memzone_autotest", "Func" : default_autotest, @@ -399,6 +405,19 @@ non_parallel_test_group_list = [ ] }, { + "Prefix": "memcmp_perf", + "Memory" : all_sockets(512), + "Tests" : + [ + { + "Name" : "Memcmp performance autotest", + "Command" : "memcmp_perf_autotest", + "Func" : default_autotest, + "Report" : None, + }, + ] +}, +{ "Prefix": "hash_perf", "Memory" : all_sockets(512), "Tests" : diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c index 6eabb21..6887629 100644 --- a/app/test/test_hash_perf.c +++ b/app/test/test_hash_perf.c @@ -440,7 +440,7 @@ run_single_tbl_perf_test(const struct rte_hash *h, hash_operation func, uint32_t *invalid_pos_count) { uint64_t begin, end, ticks = 0; - uint8_t *key = NULL; + uint8_t * volatile key = NULL; uint32_t *bucket_occupancies = NULL; uint32_t num_buckets, i, j; int32_t pos; @@ -547,30 +547,30 @@ run_tbl_perf_test(struct tbl_perf_test_params *params) case ADD_UPDATE: num_iterations = params->num_iterations; params->num_iterations = params->entries; - run_single_tbl_perf_test(handle, rte_hash_add_key, params, - &avg_occupancy, &invalid_pos); - params->num_iterations = num_iterations; ticks = run_single_tbl_perf_test(handle, rte_hash_add_key, params, &avg_occupancy, &invalid_pos); + params->num_iterations = num_iterations; + ticks += run_single_tbl_perf_test(handle, rte_hash_add_key, + params, &avg_occupancy, &invalid_pos); break; case DELETE: num_iterations = params->num_iterations; params->num_iterations = params->entries; - run_single_tbl_perf_test(handle, rte_hash_add_key, params, - &avg_occupancy, &invalid_pos); + ticks = run_single_tbl_perf_test(handle, rte_hash_add_key, + params, &avg_occupancy, &invalid_pos); params->num_iterations = num_iterations; - ticks = run_single_tbl_perf_test(handle, rte_hash_del_key, + ticks += run_single_tbl_perf_test(handle, rte_hash_del_key, params, &avg_occupancy, &invalid_pos); break; case LOOKUP: num_iterations = params->num_iterations; params->num_iterations = params->entries; - run_single_tbl_perf_test(handle, rte_hash_add_key, params, - &avg_occupancy, &invalid_pos); + ticks = run_single_tbl_perf_test(handle, rte_hash_add_key, + params, &avg_occupancy, &invalid_pos); params->num_iterations = num_iterations; - ticks = run_single_tbl_perf_test(handle, rte_hash_lookup, + ticks += run_single_tbl_perf_test(handle, rte_hash_lookup, params, &avg_occupancy, &invalid_pos); break; default: return -1; @@ -623,10 +623,15 @@ static int run_all_tbl_perf_tests(void) static void run_hash_func_test(rte_hash_function f, uint32_t init_val, uint32_t key_len) { - static uint8_t key[RTE_HASH_KEY_LENGTH_MAX]; + static uint8_t * volatile key; uint64_t ticks = 0, start, end; unsigned i, j; + key = rte_zmalloc("func hash key", + key_len * sizeof(uint8_t), 16); + if (key == NULL) + return; + for (i = 0; i < HASHTEST_ITERATIONS; i++) { for (j = 0; j < key_len; j++) @@ -638,8 +643,11 @@ static void run_hash_func_test(rte_hash_function f, uint32_t init_val, ticks += end - start; } - printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len, - (unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS); + rte_free(key); + + printf("%-12s, %-18u, %-13u, %.02f\n", + get_hash_name(f), (unsigned) key_len, (unsigned) init_val, + (double)ticks / HASHTEST_ITERATIONS); } /* @@ -687,7 +695,7 @@ fbk_hash_perf_test(void) .socket_id = rte_socket_id(), }; struct rte_fbk_hash_table *handle = NULL; - uint32_t *keys = NULL; + uint32_t * volatile keys = NULL; unsigned indexes[TEST_SIZE]; uint64_t lookup_time = 0; unsigned added = 0; diff --git a/app/test/test_memcmp.c b/app/test/test_memcmp.c new file mode 100644 index 0000000..7d9c85f --- /dev/null +++ b/app/test/test_memcmp.c @@ -0,0 +1,229 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" + +/******************************************************************************* + * Memcmp function performance test configuration section. + * Each performance test will be performed HASHTEST_ITERATIONS times. + * + * The five arrays below control what tests are performed. Every combination + * from the array entries is tested. + */ +static size_t memcmp_sizes[] = { + 1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129, 255, + 256, 257, 320, 384, 511, 512, 513, 1023, 1024, 1025, 1518, 1522, 1600, + 2048, 3072, 4096, 5120, 6144, 7168, 8192, 16384 +}; + +/******************************************************************************/ + +#define RTE_MEMCMP_LENGTH_MAX 16384 + +/* + * Test a memcmp equal function. + */ +static int run_memcmp_eq_func_test(uint32_t len) +{ + uint32_t i, rc = 0; + uint8_t * volatile key = NULL; + + key = rte_zmalloc("memcmp key", len * sizeof(uint8_t), 16); + if (key == NULL) + return -1; + + for (i = 0; i < len; i++) + key[i] = (uint8_t) rte_rand(); + + rc = rte_memcmp(key, key, len); + rte_free(key); + + return rc; +} + +/* + * Test memcmp equal functions. + */ +static int run_memcmp_eq_func_tests(void) +{ + unsigned i; + + for (i = 0; + i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]); + i++) { + if (run_memcmp_eq_func_test(memcmp_sizes[i])) { + printf("Comparing equal %zd bytes failed\n", memcmp_sizes[i]); + return 1; + } + } + printf("RTE memcmp for equality successful\n"); + return 0; +} + +/* + * Test a memcmp less than function. + */ +static int run_memcmp_lt_func_test(uint32_t len) +{ + uint32_t i, rc; + uint8_t * volatile key_1 = NULL; + uint8_t * volatile key_2 = NULL; + + key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16); + if (key_1 == NULL) + return -1; + + key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16); + if (key_2 == NULL) + return -1; + + for (i = 0; i < len; i++) + key_1[i] = i; + + for (i = 0; i < len; i++) + key_2[i] = 2; + + rc = rte_memcmp(key_1, key_2, len); + rte_free(key_1); + rte_free(key_2); + + return rc; +} + +/* + * Test memcmp less than functions. + */ +static int run_memcmp_lt_func_tests(void) +{ + unsigned i; + + for (i = 0; + i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]); + i++) { + if (!(run_memcmp_lt_func_test(memcmp_sizes[i]) < 0)) { + printf("Comparing less than for %zd bytes failed\n", memcmp_sizes[i]); + return 1; + } + } + printf("RTE memcmp for less than successful\n"); + return 0; +} + +/* + * Test a memcmp greater than function. + */ +static int run_memcmp_gt_func_test(uint32_t len) +{ + uint32_t i, rc; + uint8_t * volatile key_1 = NULL; + uint8_t * volatile key_2 = NULL; + + key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16); + if (key_1 == NULL) + return -1; + + key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16); + if (key_2 == NULL) + return -1; + + for (i = 0; i < len; i++) + key_1[i] = 2; + + for (i = 0; i < len; i++) + key_2[i] = i; + + rc = rte_memcmp(key_1, key_2, len); + rte_free(key_1); + rte_free(key_2); + + return rc; +} + +/* + * Test memcmp less than functions. + */ +static int run_memcmp_gt_func_tests(void) +{ + unsigned i; + + for (i = 0; + i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]); + i++) { + if (!(run_memcmp_gt_func_test(memcmp_sizes[i]) > 0)) { + printf("Comparing greater than for %zd bytes failed\n", memcmp_sizes[i]); + return 1; + } + } + printf("RTE memcmp for greater than successful\n"); + return 0; +} + +/* + * Do all unit and performance tests. + */ +static int +test_memcmp(void) +{ + if (run_memcmp_eq_func_tests()) + return -1; + + if (run_memcmp_gt_func_tests()) + return -1; + + if (run_memcmp_lt_func_tests()) + return -1; + + return 0; +} + +static struct test_command memcmp_cmd = { + .command = "memcmp_autotest", + .callback = test_memcmp, +}; +REGISTER_TEST_COMMAND(memcmp_cmd); diff --git a/app/test/test_memcmp_perf.c b/app/test/test_memcmp_perf.c new file mode 100644 index 0000000..8b7a0c4 --- /dev/null +++ b/app/test/test_memcmp_perf.c @@ -0,0 +1,339 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "test.h" + +/******************************************************************************* + * Memcmp function performance test configuration section. Each performance test + * will be performed MEMCMP_ITERATIONS times. + * + * The five arrays below control what tests are performed. Every combination + * from the array entries is tested. + */ +#define MEMCMP_ITERATIONS 500 * 500 * 500 + +static size_t memcmp_sizes[] = { + 2, 5, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, + 129, 191, 192, 193, 255, 256, 257, 319, 320, 321, 383, 384, 385, 447, 448, + 449, 511, 512, 513, 767, 768, 769, 1023, 1024, 1025, 1522, 1536, 1600, + 2048, 2560, 3072, 3584, 4096, 4608, 5632, 6144, 6656, 7168, 7680, 8192, + 16834 +}; + +static size_t memcmp_lt_gt_sizes[] = { + 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192}; + +/******************************************************************************/ + +static int +run_single_memcmp_eq_perf_test(uint32_t len, int func_type, uint64_t iterations) +{ + double begin = 0, end = 0; + uint64_t i, j, rc = 0; + uint8_t * volatile key = NULL; + + key = rte_zmalloc("memcmp key", len * sizeof(uint8_t), 16); + if (key == NULL) + return -1; + + /* Prepare inputs for the current iteration */ + for (j = 0; j < len; j++) + key[j] = j / 64; + + begin = rte_rdtsc(); + + /* Perform operation, and measure time it takes */ + for (i = 0; i < iterations; i++) { + + if (func_type == 1) + rc += rte_memcmp(key, key, len); + else + rc += memcmp(key, key, len); + } + + end = rte_rdtsc() - begin; + + printf(" *** %10i, %10.4f ***\n", len, (double)(end/iterations)); + + rte_free(key); + + return rc; +} + +/* + * Run all memcmp table performance tests. + */ +static int run_all_memcmp_eq_perf_tests(void) +{ + unsigned i; + + printf(" *** RTE memcmp equal performance test results ***\n"); + printf(" *** Length (bytes), Ticks/Op. ***\n"); + + /* Loop through every combination of test parameters */ + for (i = 0; + i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]); + i++) { + /* Perform test */ + if (run_single_memcmp_eq_perf_test(memcmp_sizes[i], 1, + MEMCMP_ITERATIONS) != 0) + return -1; + } + + printf(" *** memcmp equal performance test results ***\n"); + printf(" *** Length (bytes), Ticks/Op. ***\n"); + + /* Loop through every combination of test parameters */ + for (i = 0; + i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]); + i++) { + /* Perform test */ + if (run_single_memcmp_eq_perf_test(memcmp_sizes[i], 2, + MEMCMP_ITERATIONS) != 0) + return -1; + } + return 0; +} + +static int +run_single_memcmp_lt_perf_test(uint32_t len, int func_type, + uint64_t iterations) +{ + double begin = 0, end = 0; + uint64_t i, j; + uint8_t * volatile key_1 = NULL; + uint8_t * volatile key_2 = NULL; + + key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16); + if (key_1 == NULL) + return -1; + + key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16); + if (key_2 == NULL) { + rte_free(key_1); + return -1; + } + + /* Prepare inputs for the current iteration */ + for (j = 0; j < len; j++) + key_1[j] = 1; + + for (j = 0; j < len; j++) + key_2[j] = 1; + + key_2[len / 2] = 2; + + begin = rte_rdtsc(); + + /* Perform operation, and measure time it takes */ + for (i = 0; i < iterations; i++) { + + if (func_type == 1) { + if (!(rte_memcmp(key_1, key_2, len) < 0)) + return -1; + } else { + if (!(memcmp(key_1, key_2, len) < 0)) + return -1; + } + } + + end = rte_rdtsc() - begin; + + printf(" *** %10i, %10.4f ***\n", len, (double)(end/iterations)); + + rte_free(key_1); + rte_free(key_2); + + return 0; +} + +/* + * Run all memcmp table performance tests. + */ +static int run_all_memcmp_lt_perf_tests(void) +{ + unsigned i; + + printf(" *** RTE memcmp less than performance test results ***\n"); + printf(" *** Length (bytes), Ticks/Op. ***\n"); + + /* Loop through every combination of test parameters */ + for (i = 0; + i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]); + i++) { + /* Perform test */ + if (run_single_memcmp_lt_perf_test(memcmp_lt_gt_sizes[i], 1, + MEMCMP_ITERATIONS) != 0) + return -1; + } + + printf(" *** memcmp less than performance test results ***\n"); + printf(" *** Length (bytes), Ticks/Op. ***\n"); + + /* Loop through every combination of test parameters */ + for (i = 0; + i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]); + i++) { + /* Perform test */ + if (run_single_memcmp_lt_perf_test(memcmp_lt_gt_sizes[i], 2, + MEMCMP_ITERATIONS) != 0) + return -1; + } + return 0; +} + +static int +run_single_memcmp_gt_perf_test(uint32_t len, int func_type, + uint64_t iterations) +{ + double begin = 0, end = 0; + uint64_t i, j; + uint8_t * volatile key_1 = NULL; + uint8_t * volatile key_2 = NULL; + + key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16); + if (key_1 == NULL) + return -1; + + key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16); + if (key_2 == NULL) { + rte_free(key_1); + return -1; + } + + /* Prepare inputs for the current iteration */ + for (j = 0; j < len; j++) + key_1[j] = 1; + key_1[len / 2] = 2; + + for (j = 0; j < len; j++) + key_2[j] = 1; + + begin = rte_rdtsc(); + + /* Perform operation, and measure time it takes */ + for (i = 0; i < iterations; i++) { + + if (func_type == 1) { + if (!(rte_memcmp(key_1, key_2, len) > 0)) + return -1; + } else { + if (!(memcmp(key_1, key_2, len) > 0)) + return -1; + } + } + + end = rte_rdtsc() - begin; + + printf(" *** %10i, %10.4f ***\n", len, (double)(end/iterations)); + + rte_free(key_1); + rte_free(key_2); + + return 0; +} + +/* + * Run all memcmp table performance tests. + */ +static int run_all_memcmp_gt_perf_tests(void) +{ + unsigned i; + + printf(" *** RTE memcmp greater than performance test results ***\n"); + printf(" *** Length (bytes), Ticks/Op. ***\n"); + + /* Loop through every combination of test parameters */ + for (i = 0; + i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]); + i++) { + /* Perform test */ + if (run_single_memcmp_gt_perf_test(memcmp_lt_gt_sizes[i], 1, + MEMCMP_ITERATIONS) != 0) + return -1; + } + + printf(" *** memcmp greater than performance test results ***\n"); + printf(" *** Length (bytes), Ticks/Op. ***\n"); + + /* Loop through every combination of test parameters */ + for (i = 0; + i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]); + i++) { + /* Perform test */ + if (run_single_memcmp_gt_perf_test(memcmp_lt_gt_sizes[i], 2, + MEMCMP_ITERATIONS) != 0) + return -1; + } + return 0; +} + +/* + * Do all performance tests. + */ +static int +test_memcmp_perf(void) +{ + if (run_all_memcmp_eq_perf_tests() != 0) + return -1; + + if (run_all_memcmp_lt_perf_tests() != 0) + return -1; + + if (run_all_memcmp_gt_perf_tests() != 0) + return -1; + + return 0; +} + +static struct test_command memcmp_perf_cmd = { + .command = "memcmp_perf_autotest", + .callback = test_memcmp_perf, +}; +REGISTER_TEST_COMMAND(memcmp_perf_cmd); diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h new file mode 100644 index 0000000..6e54f3b --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h @@ -0,0 +1,62 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_MEMCMP_PPC_64_H_ +#define _RTE_MEMCMP_PPC_64_H_ + +#include +#include +/*To include altivec.h, GCC version must >= 4.8 */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_memcmp.h" + +#define rte_memcmp(dst, src, n) \ + ({ (__builtin_constant_p(n)) ? \ + memcmp((dst), (src), (n)) : \ + rte_memcmp_func((dst), (src), (n)); }) + +static inline bool +rte_memcmp_func(void *dst, const void *src, size_t n) +{ + return memcmp(dst, src, n); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCMP_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h new file mode 100644 index 0000000..085dfb2 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h @@ -0,0 +1,900 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCMP_X86_64_H_ +#define _RTE_MEMCMP_X86_64_H_ + +/** + * @file + * + * Functions for SSE/AVX/AVX2 implementation of memcmp(). + */ + +#include +#include +#include +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Compare bytes between two locations. The locations must not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * @param n + * Number of bytes to compare. + * @return + * zero if src_1 equal src_2 + * -ve if src_1 less than src_2 + * +ve if src_1 greater than src_2 + */ +static inline int +rte_memcmp(const void *src_1, const void *src, + size_t n) __attribute__((always_inline)); + +/** + * Find the first different bit for comparison. + */ +static inline int +rte_cmpffd (uint32_t x, uint32_t y) +{ + int i; + int pos = x ^ y; + for (i = 0; i < 32; i++) + if (pos & (1<= 256) { + ret = rte_cmp256(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + src_1 = src_1 + 256; + src_2 = src_2 + 256; + n -= 256; + } + if (n >= 128) { + ret = rte_cmp128(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + src_1 = src_1 + 128; + src_2 = src_2 + 128; + n -= 128; + } + if (n >= 64) { + n -= 64; + ret = rte_cmp64(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + src_1 = src_1 + 64; + src_2 = src_2 + 64; + } + if (n > 32) { + ret = rte_cmp32(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n); + return ret; + } + if (n > 0) + ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n); + + return ret; + } + + while (n > 512) { + ret = rte_cmp256(src_1 + 0 * 256, src_2 + 0 * 256); + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp256(src_1 + 1 * 256, src_2 + 1 * 256); + if (unlikely(ret != 0)) + return ret; + + src_1 = src_1 + 512; + src_2 = src_2 + 512; + n -= 512; + } + goto CMP_BLOCK_LESS_THAN_512; +} + +#else /* RTE_MACHINE_CPUFLAG_AVX2 */ + +/** + * Compare 32 bytes between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp32(const void *src_1, const void *src_2) +{ + int ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16, + (const uint8_t *)src_2 + 0 * 16); + + if (unlikely(ret != 0)) + return ret; + + return rte_cmp16((const uint8_t *)src_1 + 1 * 16, + (const uint8_t *)src_2 + 1 * 16); +} + +/** + * Compare 48 bytes between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp48(const void *src_1, const void *src_2) +{ + int ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16, + (const uint8_t *)src_2 + 0 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16, + (const uint8_t *)src_2 + 1 * 16); + + if (unlikely(ret != 0)) + return ret; + + return rte_cmp16((const uint8_t *)src_1 + 2 * 16, + (const uint8_t *)src_2 + 2 * 16); +} + +/** + * Compare 64 bytes between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp64(const void *src_1, const void *src_2) +{ + int ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16, + (const uint8_t *)src_2 + 0 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16, + (const uint8_t *)src_2 + 1 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16, + (const uint8_t *)src_2 + 2 * 16); + + if (unlikely(ret != 0)) + return ret; + + return rte_cmp16((const uint8_t *)src_1 + 3 * 16, + (const uint8_t *)src_2 + 3 * 16); +} + +/** + * Compare 128 bytes or its multiple between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp128(const void *src_1, const void *src_2) +{ + int ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16, + (const uint8_t *)src_2 + 0 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16, + (const uint8_t *)src_2 + 1 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16, + (const uint8_t *)src_2 + 2 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 3 * 16, + (const uint8_t *)src_2 + 3 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 4 * 16, + (const uint8_t *)src_2 + 4 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 5 * 16, + (const uint8_t *)src_2 + 5 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 6 * 16, + (const uint8_t *)src_2 + 6 * 16); + + if (unlikely(ret != 0)) + return ret; + + return rte_cmp16((const uint8_t *)src_1 + 7 * 16, + (const uint8_t *)src_2 + 7 * 16); +} + +/** + * Compare 256 bytes between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp256(const void *src_1, const void *src_2) +{ + int ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16, + (const uint8_t *)src_2 + 0 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16, + (const uint8_t *)src_2 + 1 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16, + (const uint8_t *)src_2 + 2 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 3 * 16, + (const uint8_t *)src_2 + 3 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 4 * 16, + (const uint8_t *)src_2 + 4 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 5 * 16, + (const uint8_t *)src_2 + 5 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 6 * 16, + (const uint8_t *)src_2 + 6 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 7 * 16, + (const uint8_t *)src_2 + 7 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 8 * 16, + (const uint8_t *)src_2 + 8 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 9 * 16, + (const uint8_t *)src_2 + 9 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 10 * 16, + (const uint8_t *)src_2 + 10 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 11 * 16, + (const uint8_t *)src_2 + 11 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 12 * 16, + (const uint8_t *)src_2 + 12 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 13 * 16, + (const uint8_t *)src_2 + 13 * 16); + + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 14 * 16, + (const uint8_t *)src_2 + 14 * 16); + + if (unlikely(ret != 0)) + return ret; + + return rte_cmp16((const uint8_t *)src_1 + 15 * 16, + (const uint8_t *)src_2 + 15 * 16); +} + +/** + * Compare bytes between two locations. The locations must not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * @param n + * Number of bytes to compare. + * @return + * zero if src_1 equal src_2 + * -ve if src_1 less than src_2 + * +ve if src_1 greater than src_2 + */ +static inline int +rte_memcmp(const void *_src_1, const void *_src_2, size_t n) +{ + const uint8_t *src_1 = (const uint8_t *)_src_1; + const uint8_t *src_2 = (const uint8_t *)_src_2; + int ret = 0; + + if (n < 16) + return rte_memcmp_regular(src_1, src_2, n); + + if (n <= 32) { + ret = rte_cmp16(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + + return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n); + } + + if (n <= 48) { + ret = rte_cmp32(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + + return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n); + } + + if (n <= 64) { + ret = rte_cmp32(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16(src_1 + 32, src_2 + 32); + + if (unlikely(ret != 0)) + return ret; + + return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n); + } + + if (n <= 96) { + ret = rte_cmp64(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + + ret = rte_cmp16(src_1 + 64, src_2 + 64); + if (unlikely(ret != 0)) + return ret; + + return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n); + } + + if (n <= 128) + goto CMP_BLOCK_LESS_THAN_128; + + if (n <= 512) { + if (n >= 256) { + ret = rte_cmp256(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + + src_1 = src_1 + 256; + src_2 = src_2 + 256; + n -= 256; + } + +CMP_BLOCK_LESS_THAN_256: + if (n >= 128) { + ret = rte_cmp128(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + + src_1 = src_1 + 128; + src_2 = src_2 + 128; + n -= 128; + } + +CMP_BLOCK_LESS_THAN_128: + if (n >= 64) { + ret = rte_cmp64(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + + src_1 = src_1 + 64; + src_2 = src_2 + 64; + n -= 64; + } + + if (n >= 32) { + ret = rte_cmp32(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + src_1 = src_1 + 32; + src_2 = src_2 + 32; + n -= 32; + } + if (n > 16) { + ret = rte_cmp16(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n); + return ret; + } + if (n > 0) + ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n); + + return ret; + } + + for (; n >= 256; n -= 256) { + ret = rte_cmp256(src_1, src_2); + if (unlikely(ret != 0)) + return ret; + + src_1 = src_1 + 256; + src_2 = src_2 + 256; + } + + goto CMP_BLOCK_LESS_THAN_256; +} + +#endif /* RTE_MACHINE_CPUFLAG_AVX2 */ + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCMP_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h new file mode 100644 index 0000000..5e68036 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_memcmp.h @@ -0,0 +1,175 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCMP_H_ +#define _RTE_MEMCMP_H_ + +/** + * @file + * + * Functions for vectorised implementation of memcmp(). + */ + +/** + * Find the first different bit for comparison. + */ +static inline int +rte_cmpffd (uint32_t x, uint32_t y); + +/** + * Find the first different byte for comparison. + */ +static inline int +rte_cmpffdb (const uint8_t *x, const uint8_t *y, size_t n); + +/** + * Compare 16 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src + * Pointer to the second source of the data. + * zero if src_1 equal src_2 + * -ve if src_1 less than src_2 + * +ve if src_1 greater than src_2 + */ +static inline int +rte_cmp16(const void *src_1, const void *src_2); + +/** + * Compare 32 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * zero if src_1 equal src_2 + * -ve if src_1 less than src_2 + * +ve if src_1 greater than src_2 + */ +static inline int +rte_cmp32(const void *src_1, const void *src_2); + +/** + * Compare 64 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src + * Pointer to the second source of the data. + * zero if src_1 equal src_2 + * -ve if src_1 less than src_2 + * +ve if src_1 greater than src_2 + */ +static inline int +rte_cmp64(const void *src_1, const void *src_2); + +/** + * Compare 48 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src + * Pointer to the second source of the data. + * zero if src_1 equal src_2 + * -ve if src_1 less than src_2 + * +ve if src_1 greater than src_2 + */ +static inline int +rte_cmp48(const void *src_1, const void *src_2); + +/** + * Compare 128 bytes between two locations using + * optimised instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * zero if src_1 equal src_2 + * -ve if src_1 less than src_2 + * +ve if src_1 greater than src_2 + */ +static inline int +rte_cmp128(const void *src_1, const void *src_2); + +/** + * Compare 256 bytes or greater between two locations using + * optimised instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * zero if src_1 equal src_2 + * -ve if src_1 less than src_2 + * +ve if src_1 greater than src_2 + */ +static inline int +rte_cmp256(const void *src_1, const void *src_2); + +#ifdef __DOXYGEN__ + +/** + * Compare bytes between two locations. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * @param n + * Number of bytes to copy. + * @return + * zero if src_1 equal src_2 + * -ve if src_1 less than src_2 + * +ve if src_1 greater than src_2 + */ +static int +rte_memcmp(const void *dst, const void *src, size_t n); + +#endif /* __DOXYGEN__ */ + +/* + * memcmp() function used by rte_memcmp macro + */ +static inline int +rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline)); + +#endif /* _RTE_MEMCMP_H_ */ diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c index 9245716..075da62 100644 --- a/lib/librte_hash/rte_hash.c +++ b/lib/librte_hash/rte_hash.c @@ -42,6 +42,7 @@ #include /* for definition of RTE_CACHE_LINE_SIZE */ #include #include +#include #include #include #include @@ -299,6 +300,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, uint8_t *key_bucket; uint32_t bucket_index, i; int32_t pos; + const void * volatile key_1 = key; /* Get the hash signature and bucket index */ sig |= h->sig_msb; @@ -308,10 +310,13 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, /* Check if key is already present in the hash */ for (i = 0; i < h->bucket_entries; i++) { - if ((sig == sig_bucket[i]) && - likely(memcmp(key, get_key_from_bucket(h, key_bucket, i), - h->key_len) == 0)) { - return bucket_index * h->bucket_entries + i; + if (sig == sig_bucket[i]) { + + const void * volatile key_2 = + get_key_from_bucket(h, key_bucket, i); + + if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) + return bucket_index * h->bucket_entries + i; } } @@ -350,6 +355,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, uint8_t *key_bucket; uint32_t bucket_index, i; + const void * volatile key_1 = key; + /* Get the hash signature and bucket index */ sig = sig | h->sig_msb; bucket_index = sig & h->bucket_bitmask; @@ -358,11 +365,14 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, /* Check if key is already present in the hash */ for (i = 0; i < h->bucket_entries; i++) { - if ((sig == sig_bucket[i]) && - likely(memcmp(key, get_key_from_bucket(h, key_bucket, i), - h->key_len) == 0)) { - sig_bucket[i] = NULL_SIGNATURE; - return bucket_index * h->bucket_entries + i; + if (sig == sig_bucket[i]) { + const void * volatile key_2 = + get_key_from_bucket(h, key_bucket, i); + + if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) { + sig_bucket[i] = NULL_SIGNATURE; + return bucket_index * h->bucket_entries + i; + } } } @@ -392,6 +402,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, uint8_t *key_bucket; uint32_t bucket_index, i; + const void * volatile key_1 = key; + /* Get the hash signature and bucket index */ sig |= h->sig_msb; bucket_index = sig & h->bucket_bitmask; @@ -400,10 +412,13 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, /* Check if key is already present in the hash */ for (i = 0; i < h->bucket_entries; i++) { - if ((sig == sig_bucket[i]) && - likely(memcmp(key, get_key_from_bucket(h, key_bucket, i), - h->key_len) == 0)) { - return bucket_index * h->bucket_entries + i; + if (sig == sig_bucket[i]) { + + const void * volatile key_2 = + get_key_from_bucket(h, key_bucket, i); + + if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) + return bucket_index * h->bucket_entries + i; } } @@ -456,13 +471,17 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, positions[i] = -ENOENT; for (j = 0; j < h->bucket_entries; j++) { - if ((sigs[i] == sig_bucket[j]) && - likely(memcmp(keys[i], - get_key_from_bucket(h, key_bucket, j), - h->key_len) == 0)) { - positions[i] = bucket_index * - h->bucket_entries + j; - break; + if (sigs[i] == sig_bucket[j]) { + + const void * volatile key_1 = keys[i]; + const void * volatile key_2 = + get_key_from_bucket(h, key_bucket, j); + if (likely(rte_memcmp(key_1, key_2, + h->key_len) == 0)) { + positions[i] = bucket_index * + h->bucket_entries + j; + break; + } } } } -- 1.9.1