From: Ravi Kerur <rkerur@gmail.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2] Implement memcmp using AVX/SSE instructions.
Date: Fri, 8 May 2015 14:19:49 -0700 [thread overview]
Message-ID: <1431119989-32124-1-git-send-email-rkerur@gmail.com> (raw)
In-Reply-To: <1431119946-32078-1-git-send-email-rkerur@gmail.com>
This patch replaces memcmp in librte_hash with rte_memcmp which is
implemented with AVX/SSE instructions.
Preliminary results on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu
14.04 x86_64 shows comparisons using AVX/SSE instructions taking 1/3rd
CPU ticks for 16, 32, 48 and 64 bytes comparison. In addition,
hash_perf_autotest results shows using new comparison function results in
faster completion of hash operations than existing memcmp in all categories.
Signed-off-by: Ravi Kerur <rkerur@gmail.com>
---
app/test/test_hash_perf.c | 36 +-
.../common/include/arch/ppc_64/rte_memcmp.h | 62 +++
.../common/include/arch/x86/rte_memcmp.h | 421 +++++++++++++++++++++
lib/librte_eal/common/include/generic/rte_memcmp.h | 131 +++++++
lib/librte_hash/rte_hash.c | 59 ++-
5 files changed, 675 insertions(+), 34 deletions(-)
create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h
create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..6887629 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -440,7 +440,7 @@ run_single_tbl_perf_test(const struct rte_hash *h, hash_operation func,
uint32_t *invalid_pos_count)
{
uint64_t begin, end, ticks = 0;
- uint8_t *key = NULL;
+ uint8_t * volatile key = NULL;
uint32_t *bucket_occupancies = NULL;
uint32_t num_buckets, i, j;
int32_t pos;
@@ -547,30 +547,30 @@ run_tbl_perf_test(struct tbl_perf_test_params *params)
case ADD_UPDATE:
num_iterations = params->num_iterations;
params->num_iterations = params->entries;
- run_single_tbl_perf_test(handle, rte_hash_add_key, params,
- &avg_occupancy, &invalid_pos);
- params->num_iterations = num_iterations;
ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
params, &avg_occupancy, &invalid_pos);
+ params->num_iterations = num_iterations;
+ ticks += run_single_tbl_perf_test(handle, rte_hash_add_key,
+ params, &avg_occupancy, &invalid_pos);
break;
case DELETE:
num_iterations = params->num_iterations;
params->num_iterations = params->entries;
- run_single_tbl_perf_test(handle, rte_hash_add_key, params,
- &avg_occupancy, &invalid_pos);
+ ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
+ params, &avg_occupancy, &invalid_pos);
params->num_iterations = num_iterations;
- ticks = run_single_tbl_perf_test(handle, rte_hash_del_key,
+ ticks += run_single_tbl_perf_test(handle, rte_hash_del_key,
params, &avg_occupancy, &invalid_pos);
break;
case LOOKUP:
num_iterations = params->num_iterations;
params->num_iterations = params->entries;
- run_single_tbl_perf_test(handle, rte_hash_add_key, params,
- &avg_occupancy, &invalid_pos);
+ ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
+ params, &avg_occupancy, &invalid_pos);
params->num_iterations = num_iterations;
- ticks = run_single_tbl_perf_test(handle, rte_hash_lookup,
+ ticks += run_single_tbl_perf_test(handle, rte_hash_lookup,
params, &avg_occupancy, &invalid_pos);
break;
default: return -1;
@@ -623,10 +623,15 @@ static int run_all_tbl_perf_tests(void)
static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
uint32_t key_len)
{
- static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+ static uint8_t * volatile key;
uint64_t ticks = 0, start, end;
unsigned i, j;
+ key = rte_zmalloc("func hash key",
+ key_len * sizeof(uint8_t), 16);
+ if (key == NULL)
+ return;
+
for (i = 0; i < HASHTEST_ITERATIONS; i++) {
for (j = 0; j < key_len; j++)
@@ -638,8 +643,11 @@ static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
ticks += end - start;
}
- printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
- (unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
+ rte_free(key);
+
+ printf("%-12s, %-18u, %-13u, %.02f\n",
+ get_hash_name(f), (unsigned) key_len, (unsigned) init_val,
+ (double)ticks / HASHTEST_ITERATIONS);
}
/*
@@ -687,7 +695,7 @@ fbk_hash_perf_test(void)
.socket_id = rte_socket_id(),
};
struct rte_fbk_hash_table *handle = NULL;
- uint32_t *keys = NULL;
+ uint32_t * volatile keys = NULL;
unsigned indexes[TEST_SIZE];
uint64_t lookup_time = 0;
unsigned added = 0;
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
new file mode 100644
index 0000000..7f99ee1
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
@@ -0,0 +1,62 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCMP_PPC_64_H_
+#define _RTE_MEMCMP_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+/*To include altivec.h, GCC version must >= 4.8 */
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcmp.h"
+
+#define rte_memcmp(dst, src, n) \
+ ({ (__builtin_constant_p(n)) ? \
+ memcmp((dst), (src), (n)) : \
+ rte_memcmp_func((dst), (src), (n)); })
+
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n)
+{
+ return memcmp(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
new file mode 100644
index 0000000..b2bdeec
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
@@ -0,0 +1,421 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_X86_64_H_
+#define _RTE_MEMCMP_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2 implementation of memcmp().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <rte_vect.h>
+#include <rte_branch_prediction.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to compare.
+ * @return
+ * true if equal otherwise false.
+ */
+static inline int
+rte_memcmp(const void *src_1, const void *src,
+ size_t n) __attribute__((always_inline));
+
+/**
+ * Compare 16 bytes between two locations.
+ * locations should not overlap.
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2)
+{
+ __m128i xmm0, xmm1, xmm2;
+ int ret = 0;
+
+ xmm0 = _mm_lddqu_si128((const __m128i *)src_1);
+ xmm1 = _mm_lddqu_si128((const __m128i *)src_2);
+ xmm2 = _mm_xor_si128(xmm0, xmm1);
+
+ if (unlikely(!_mm_testz_si128(xmm2, xmm2))) {
+
+ const uint64_t mm11 = *(const uint64_t *)src_1;
+ const uint64_t mm12 = *((const uint64_t *)src_1 + 1);
+
+ const uint64_t mm21 = *(const uint64_t *)src_2;
+ const uint64_t mm22 = *((const uint64_t *)src_2 + 1);
+
+ if (mm11 == mm21)
+ (mm12 < mm22) ? (ret = -1) : (ret = 1);
+ else
+ (mm11 < mm21) ? (ret = -1) : (ret = 1);
+ }
+
+ return ret;
+}
+
+/**
+ * AVX2 implementation below
+ */
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+ const __m128i* src1 = (const __m128i*)src_1;
+ const __m128i* src2 = (const __m128i*)src_2;
+
+ __m128i mm11 = _mm_lddqu_si128(src1);
+ __m128i mm12 = _mm_lddqu_si128(src1 + 1);
+ __m128i mm21 = _mm_lddqu_si128(src2);
+ __m128i mm22 = _mm_lddqu_si128(src2 + 1);
+
+ __m128i mm1 = _mm_xor_si128(mm11, mm21);
+ __m128i mm2 = _mm_xor_si128(mm12, mm22);
+ __m128i mm = _mm_or_si128(mm1, mm2);
+
+ if (unlikely(!_mm_testz_si128(mm, mm))) {
+
+ /*
+ * Find out which of the two 16-byte blocks
+ * are different.
+ */
+ if (_mm_testz_si128(mm1, mm1)) {
+ mm11 = mm12;
+ mm21 = mm22;
+ mm1 = mm2;
+ }
+
+ // Produce the comparison result
+ __m128i mm_cmp = _mm_cmpgt_epi8(mm21, mm11);
+ __m128i mm_rcmp = _mm_cmpgt_epi8(mm11, mm21);
+ mm_cmp = _mm_xor_si128(mm1, mm_cmp);
+ mm_rcmp = _mm_xor_si128(mm1, mm_rcmp);
+
+ uint32_t cmp = _mm_movemask_epi8(mm_cmp);
+ uint32_t rcmp = _mm_movemask_epi8(mm_rcmp);
+ cmp = (cmp - 1u) ^ cmp;
+ rcmp = (rcmp - 1u) ^ rcmp;
+ return (int32_t)cmp - (int32_t)rcmp;
+ }
+
+ return 0;
+}
+
+static inline int
+rte_cmp64 (const void* src_1, const void* src_2)
+{
+ const __m256i* src1 = (const __m256i*)src_1;
+ const __m256i* src2 = (const __m256i*)src_2;
+
+ __m256i mm11 = _mm256_lddqu_si256(src1);
+ __m256i mm12 = _mm256_lddqu_si256(src1 + 1);
+ __m256i mm21 = _mm256_lddqu_si256(src2);
+ __m256i mm22 = _mm256_lddqu_si256(src2 + 1);
+
+ __m256i mm1 = _mm256_xor_si256(mm11, mm21);
+ __m256i mm2 = _mm256_xor_si256(mm12, mm22);
+ __m256i mm = _mm256_or_si256(mm1, mm2);
+
+ if (unlikely(!_mm256_testz_si256(mm, mm))) {
+ /*
+ * Find out which of the two 32-byte blocks
+ * are different.
+ */
+ if (_mm256_testz_si256(mm1, mm1)) {
+ mm11 = mm12;
+ mm21 = mm22;
+ mm1 = mm2;
+ }
+
+ // Produce the comparison result
+ __m256i mm_cmp = _mm256_cmpgt_epi8(mm21, mm11);
+ __m256i mm_rcmp = _mm256_cmpgt_epi8(mm11, mm21);
+ mm_cmp = _mm256_xor_si256(mm1, mm_cmp);
+ mm_rcmp = _mm256_xor_si256(mm1, mm_rcmp);
+
+ uint32_t cmp = _mm256_movemask_epi8(mm_cmp);
+ uint32_t rcmp = _mm256_movemask_epi8(mm_rcmp);
+ cmp = (cmp - 1u) ^ cmp;
+ rcmp = (rcmp - 1u) ^ rcmp;
+ return (int32_t)cmp - (int32_t)rcmp;
+ }
+
+ return 0;
+}
+
+static inline int
+rte_cmp128 (const void* src_1, const void* src_2)
+{
+ const __m256i* src1 = (const __m256i*)src_1;
+ const __m256i* src2 = (const __m256i*)src_2;
+ const size_t n = 2;
+ size_t i;
+
+ for (i = 0; i < n; ++i, src1 += 2, src2 += 2) {
+ __m256i mm11 = _mm256_lddqu_si256(src1);
+ __m256i mm12 = _mm256_lddqu_si256(src1 + 1);
+ __m256i mm21 = _mm256_lddqu_si256(src2);
+ __m256i mm22 = _mm256_lddqu_si256(src2 + 1);
+
+ __m256i mm1 = _mm256_xor_si256(mm11, mm21);
+ __m256i mm2 = _mm256_xor_si256(mm12, mm22);
+ __m256i mm = _mm256_or_si256(mm1, mm2);
+
+ if (unlikely(!_mm256_testz_si256(mm, mm))) {
+ /*
+ * Find out which of the two 32-byte blocks
+ * are different.
+ */
+ if (_mm256_testz_si256(mm1, mm1)) {
+ mm11 = mm12;
+ mm21 = mm22;
+ mm1 = mm2;
+ }
+
+ // Produce the comparison result
+ __m256i mm_cmp = _mm256_cmpgt_epi8(mm21, mm11);
+ __m256i mm_rcmp = _mm256_cmpgt_epi8(mm11, mm21);
+ mm_cmp = _mm256_xor_si256(mm1, mm_cmp);
+ mm_rcmp = _mm256_xor_si256(mm1, mm_rcmp);
+
+ uint32_t cmp = _mm256_movemask_epi8(mm_cmp);
+ uint32_t rcmp = _mm256_movemask_epi8(mm_rcmp);
+ cmp = (cmp - 1u) ^ cmp;
+ rcmp = (rcmp - 1u) ^ rcmp;
+ return (int32_t)cmp - (int32_t)rcmp;
+ }
+ }
+
+ return 0;
+}
+#else /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (likely(ret == 0))
+ return rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+
+ return ret;
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32,
+ (const uint8_t *)src_2 + 0 * 32);
+
+ if (likely(ret == 0))
+ return rte_cmp32((const uint8_t *)src_1 + 1 * 32,
+ (const uint8_t *)src_2 + 1 * 32);
+
+ return ret;
+}
+
+/**
+ * Compare 128 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64,
+ (const uint8_t *)src_2 + 0 * 64);
+
+ if (likely(ret == 0))
+ return rte_cmp64((const uint8_t *)src_1 + 1 * 64,
+ (const uint8_t *)src_2 + 1 * 64);
+
+ return ret;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+
+/**
+ * Compare 48 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32,
+ (const uint8_t *)src_2 + 0 * 32);
+
+ if (likely(ret == 0))
+ return rte_cmp16((const uint8_t *)src_1 + 1 * 32,
+ (const uint8_t *)src_2 + 1 * 32);
+ return ret;
+}
+
+static inline int
+rte_memcmp_remainder(const uint8_t *src_1u, const uint8_t *src_2u, size_t n)
+{
+ int ret = 1;
+
+ /**
+ * Compare less than 16 bytes
+ */
+ if (n & 0x08) {
+ ret = (*(const uint64_t *)src_1u ==
+ *(const uint64_t *)src_2u);
+ if (likely(ret == 1)) {
+ n -= 0x8;
+ src_1u += 0x8;
+ src_2u += 0x8;
+ } else {
+ goto exit;
+ }
+ }
+
+ if (n & 0x04) {
+ ret = (*(const uint32_t *)src_1u ==
+ *(const uint32_t *)src_2u);
+ if (likely(ret == 1)) {
+ n -= 0x4;
+ src_1u += 0x4;
+ src_2u += 0x4;
+ } else {
+ goto exit;
+ }
+ }
+
+ if (n & 0x02) {
+ ret = (*(const uint16_t *)src_1u ==
+ *(const const uint16_t *)src_2u);
+
+ if (likely(ret == 1)) {
+ n -= 0x2;
+ src_1u += 0x2;
+ src_2u += 0x2;
+ } else {
+ goto exit;
+ }
+ }
+
+ if (n & 0x01) {
+ ret = (*(const uint8_t *)src_1u ==
+ *(const uint8_t *)src_2u);
+ if (likely(ret == 1)) {
+ return 0;
+ } else {
+ goto exit;
+ }
+ }
+
+ return !ret;
+exit:
+
+ return src_1u < src_2u ? -1 : 1;
+}
+
+static inline int
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+ const uint8_t *src_1 = (const uint8_t *)_src_1;
+ const uint8_t *src_2 = (const uint8_t *)_src_2;
+ int ret = 0;
+
+ if (n & 0x80)
+ return rte_cmp128(src_1, src_2);
+
+ if (n & 0x40)
+ return rte_cmp64(src_1, src_2);
+
+ if (n & 0x20) {
+ ret = rte_cmp32(src_1, src_2);
+ n -= 0x20;
+ src_1 += 0x20;
+ src_2 += 0x20;
+ }
+
+ if ((n & 0x10) && likely(ret == 0)) {
+ ret = rte_cmp16(src_1, src_2);
+ n -= 0x10;
+ src_1 += 0x10;
+ src_2 += 0x10;
+ }
+
+ if (n && likely(ret == 0))
+ ret = rte_memcmp_remainder(src_1, src_2, n);
+
+ return ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h
new file mode 100644
index 0000000..db9626b
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memcmp.h
@@ -0,0 +1,131 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_H_
+#define _RTE_MEMCMP_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcmp().
+ */
+
+/**
+ * Compare 16 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2);
+
+/**
+ * Compare 32 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2);
+
+/**
+ * Compare 64 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2);
+
+/**
+ * Compare 48 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2);
+
+/**
+ * Compare 128 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to copy.
+ * @return
+ * true if match otherwise false.
+ */
+static int
+rte_memcmp(const void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+/*
+ * memcmp() function used by rte_memcmp macro
+ */
+static inline int
+rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+#endif /* _RTE_MEMCMP_H_ */
diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c
index 9245716..075da62 100644
--- a/lib/librte_hash/rte_hash.c
+++ b/lib/librte_hash/rte_hash.c
@@ -42,6 +42,7 @@
#include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */
#include <rte_log.h>
#include <rte_memcpy.h>
+#include <rte_memcmp.h>
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
#include <rte_memzone.h>
@@ -299,6 +300,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
uint8_t *key_bucket;
uint32_t bucket_index, i;
int32_t pos;
+ const void * volatile key_1 = key;
/* Get the hash signature and bucket index */
sig |= h->sig_msb;
@@ -308,10 +310,13 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
- if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
- h->key_len) == 0)) {
- return bucket_index * h->bucket_entries + i;
+ if (sig == sig_bucket[i]) {
+
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, i);
+
+ if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0))
+ return bucket_index * h->bucket_entries + i;
}
}
@@ -350,6 +355,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
uint8_t *key_bucket;
uint32_t bucket_index, i;
+ const void * volatile key_1 = key;
+
/* Get the hash signature and bucket index */
sig = sig | h->sig_msb;
bucket_index = sig & h->bucket_bitmask;
@@ -358,11 +365,14 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
- if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
- h->key_len) == 0)) {
- sig_bucket[i] = NULL_SIGNATURE;
- return bucket_index * h->bucket_entries + i;
+ if (sig == sig_bucket[i]) {
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, i);
+
+ if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) {
+ sig_bucket[i] = NULL_SIGNATURE;
+ return bucket_index * h->bucket_entries + i;
+ }
}
}
@@ -392,6 +402,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
uint8_t *key_bucket;
uint32_t bucket_index, i;
+ const void * volatile key_1 = key;
+
/* Get the hash signature and bucket index */
sig |= h->sig_msb;
bucket_index = sig & h->bucket_bitmask;
@@ -400,10 +412,13 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
- if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
- h->key_len) == 0)) {
- return bucket_index * h->bucket_entries + i;
+ if (sig == sig_bucket[i]) {
+
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, i);
+
+ if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0))
+ return bucket_index * h->bucket_entries + i;
}
}
@@ -456,13 +471,17 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
positions[i] = -ENOENT;
for (j = 0; j < h->bucket_entries; j++) {
- if ((sigs[i] == sig_bucket[j]) &&
- likely(memcmp(keys[i],
- get_key_from_bucket(h, key_bucket, j),
- h->key_len) == 0)) {
- positions[i] = bucket_index *
- h->bucket_entries + j;
- break;
+ if (sigs[i] == sig_bucket[j]) {
+
+ const void * volatile key_1 = keys[i];
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, j);
+ if (likely(rte_memcmp(key_1, key_2,
+ h->key_len) == 0)) {
+ positions[i] = bucket_index *
+ h->bucket_entries + j;
+ break;
+ }
}
}
}
--
1.9.1
next prev parent reply other threads:[~2015-05-08 21:19 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-05-08 21:19 [dpdk-dev] [PATCH v2] Implement rte_memcmp with " Ravi Kerur
2015-05-08 21:19 ` Ravi Kerur [this message]
2015-05-08 22:29 ` [dpdk-dev] [PATCH v2] Implement memcmp using " Matt Laswell
2015-05-08 22:54 ` Ravi Kerur
2015-05-08 23:25 ` Matt Laswell
2015-05-11 9:51 ` Ananyev, Konstantin
2015-05-11 17:42 ` Ravi Kerur
[not found] ` <2601191342CEEE43887BDE71AB9772582142E44A@irsmsx105.ger.corp.intel.com>
2015-05-11 19:35 ` Ananyev, Konstantin
2015-05-11 20:46 ` Ravi Kerur
2015-05-11 22:29 ` Don Provan
2015-05-13 1:16 ` Ravi Kerur
2015-05-13 9:03 ` Bruce Richardson
2015-05-13 20:08 ` Ravi Kerur
2015-05-13 12:21 ` Jay Rolette
2015-05-13 20:07 ` Ravi Kerur
[not found] ` <2601191342CEEE43887BDE71AB9772582142EBB5@irsmsx105.ger.corp.intel.com>
2015-05-13 10:12 ` Ananyev, Konstantin
2015-05-13 20:06 ` Ravi Kerur
2015-05-12 8:13 ` Linhaifeng
2015-05-13 1:18 ` Ravi Kerur
2015-05-13 7:22 ` Linhaifeng
2015-05-13 20:00 ` Ravi Kerur
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1431119989-32124-1-git-send-email-rkerur@gmail.com \
--to=rkerur@gmail.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).