From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pd0-f170.google.com (mail-pd0-f170.google.com [209.85.192.170]) by dpdk.org (Postfix) with ESMTP id 60C7C9AD8 for ; Fri, 8 May 2015 23:19:53 +0200 (CEST) Received: by pdbqd1 with SMTP id qd1so97298187pdb.2 for ; Fri, 08 May 2015 14:19:52 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=5hqMcH2x/5lEVIyC9xMgfOSWMylxZ153aAsg15BZdq8=; b=ysZOOlb+PjH+/m+Eo8KRLZegg0MAt4YY5kjg4xy1Q2Y3bnlSP5DJaKj7DGmnJAKwpJ vWAJkgGgRCGHAiGfhINYHKjVqhQr/7oXRHkYqBlrvDkVNzIFVhrtaha40gxtP6pEIrpK RJPTef9NHYmA7L0SVQ+aWn1BbOMIBrDwdu/gEWOlOV1vUOQmSRHm8aix10RFaJFZqsjS NhtSuAhr3e6dNLj36a5tEBhN3uKZKL50grg8UwsX/FC6eHwI6RJEwriGM9QYO42seWX8 iSyqDrQq2NXcv+yOl8yqIXQ6/CIjLzKmrtWrvO011d4HUB0VWYwEZjbDYzdXo0xdcA5U 3mbA== X-Received: by 10.70.55.165 with SMTP id t5mr6193pdp.102.1431119992747; Fri, 08 May 2015 14:19:52 -0700 (PDT) Received: from user-PC.hsd1.ca.comcast.net (c-98-234-176-9.hsd1.ca.comcast.net. [98.234.176.9]) by mx.google.com with ESMTPSA id k3sm6107353pde.18.2015.05.08.14.19.51 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Fri, 08 May 2015 14:19:52 -0700 (PDT) From: Ravi Kerur To: dev@dpdk.org Date: Fri, 8 May 2015 14:19:49 -0700 Message-Id: <1431119989-32124-1-git-send-email-rkerur@gmail.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1431119946-32078-1-git-send-email-rkerur@gmail.com> References: <1431119946-32078-1-git-send-email-rkerur@gmail.com> Subject: [dpdk-dev] [PATCH v2] Implement memcmp using AVX/SSE instructions. X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 08 May 2015 21:19:54 -0000 This patch replaces memcmp in librte_hash with rte_memcmp which is implemented with AVX/SSE instructions. Preliminary results on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu 14.04 x86_64 shows comparisons using AVX/SSE instructions taking 1/3rd CPU ticks for 16, 32, 48 and 64 bytes comparison. In addition, hash_perf_autotest results shows using new comparison function results in faster completion of hash operations than existing memcmp in all categories. Signed-off-by: Ravi Kerur --- app/test/test_hash_perf.c | 36 +- .../common/include/arch/ppc_64/rte_memcmp.h | 62 +++ .../common/include/arch/x86/rte_memcmp.h | 421 +++++++++++++++++++++ lib/librte_eal/common/include/generic/rte_memcmp.h | 131 +++++++ lib/librte_hash/rte_hash.c | 59 ++- 5 files changed, 675 insertions(+), 34 deletions(-) create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c index 6eabb21..6887629 100644 --- a/app/test/test_hash_perf.c +++ b/app/test/test_hash_perf.c @@ -440,7 +440,7 @@ run_single_tbl_perf_test(const struct rte_hash *h, hash_operation func, uint32_t *invalid_pos_count) { uint64_t begin, end, ticks = 0; - uint8_t *key = NULL; + uint8_t * volatile key = NULL; uint32_t *bucket_occupancies = NULL; uint32_t num_buckets, i, j; int32_t pos; @@ -547,30 +547,30 @@ run_tbl_perf_test(struct tbl_perf_test_params *params) case ADD_UPDATE: num_iterations = params->num_iterations; params->num_iterations = params->entries; - run_single_tbl_perf_test(handle, rte_hash_add_key, params, - &avg_occupancy, &invalid_pos); - params->num_iterations = num_iterations; ticks = run_single_tbl_perf_test(handle, rte_hash_add_key, params, &avg_occupancy, &invalid_pos); + params->num_iterations = num_iterations; + ticks += run_single_tbl_perf_test(handle, rte_hash_add_key, + params, &avg_occupancy, &invalid_pos); break; case DELETE: num_iterations = params->num_iterations; params->num_iterations = params->entries; - run_single_tbl_perf_test(handle, rte_hash_add_key, params, - &avg_occupancy, &invalid_pos); + ticks = run_single_tbl_perf_test(handle, rte_hash_add_key, + params, &avg_occupancy, &invalid_pos); params->num_iterations = num_iterations; - ticks = run_single_tbl_perf_test(handle, rte_hash_del_key, + ticks += run_single_tbl_perf_test(handle, rte_hash_del_key, params, &avg_occupancy, &invalid_pos); break; case LOOKUP: num_iterations = params->num_iterations; params->num_iterations = params->entries; - run_single_tbl_perf_test(handle, rte_hash_add_key, params, - &avg_occupancy, &invalid_pos); + ticks = run_single_tbl_perf_test(handle, rte_hash_add_key, + params, &avg_occupancy, &invalid_pos); params->num_iterations = num_iterations; - ticks = run_single_tbl_perf_test(handle, rte_hash_lookup, + ticks += run_single_tbl_perf_test(handle, rte_hash_lookup, params, &avg_occupancy, &invalid_pos); break; default: return -1; @@ -623,10 +623,15 @@ static int run_all_tbl_perf_tests(void) static void run_hash_func_test(rte_hash_function f, uint32_t init_val, uint32_t key_len) { - static uint8_t key[RTE_HASH_KEY_LENGTH_MAX]; + static uint8_t * volatile key; uint64_t ticks = 0, start, end; unsigned i, j; + key = rte_zmalloc("func hash key", + key_len * sizeof(uint8_t), 16); + if (key == NULL) + return; + for (i = 0; i < HASHTEST_ITERATIONS; i++) { for (j = 0; j < key_len; j++) @@ -638,8 +643,11 @@ static void run_hash_func_test(rte_hash_function f, uint32_t init_val, ticks += end - start; } - printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len, - (unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS); + rte_free(key); + + printf("%-12s, %-18u, %-13u, %.02f\n", + get_hash_name(f), (unsigned) key_len, (unsigned) init_val, + (double)ticks / HASHTEST_ITERATIONS); } /* @@ -687,7 +695,7 @@ fbk_hash_perf_test(void) .socket_id = rte_socket_id(), }; struct rte_fbk_hash_table *handle = NULL; - uint32_t *keys = NULL; + uint32_t * volatile keys = NULL; unsigned indexes[TEST_SIZE]; uint64_t lookup_time = 0; unsigned added = 0; diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h new file mode 100644 index 0000000..7f99ee1 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h @@ -0,0 +1,62 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_MEMCMP_PPC_64_H_ +#define _RTE_MEMCMP_PPC_64_H_ + +#include +#include +/*To include altivec.h, GCC version must >= 4.8 */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_memcmp.h" + +#define rte_memcmp(dst, src, n) \ + ({ (__builtin_constant_p(n)) ? \ + memcmp((dst), (src), (n)) : \ + rte_memcmp_func((dst), (src), (n)); }) + +static inline bool +rte_memcmp_func(void *dst, const void *src, size_t n) +{ + return memcmp(dst, src, n); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCMP_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h new file mode 100644 index 0000000..b2bdeec --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h @@ -0,0 +1,421 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCMP_X86_64_H_ +#define _RTE_MEMCMP_X86_64_H_ + +/** + * @file + * + * Functions for SSE/AVX/AVX2 implementation of memcmp(). + */ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Compare bytes between two locations. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * @param n + * Number of bytes to compare. + * @return + * true if equal otherwise false. + */ +static inline int +rte_memcmp(const void *src_1, const void *src, + size_t n) __attribute__((always_inline)); + +/** + * Compare 16 bytes between two locations. + * locations should not overlap. + */ +static inline int +rte_cmp16(const void *src_1, const void *src_2) +{ + __m128i xmm0, xmm1, xmm2; + int ret = 0; + + xmm0 = _mm_lddqu_si128((const __m128i *)src_1); + xmm1 = _mm_lddqu_si128((const __m128i *)src_2); + xmm2 = _mm_xor_si128(xmm0, xmm1); + + if (unlikely(!_mm_testz_si128(xmm2, xmm2))) { + + const uint64_t mm11 = *(const uint64_t *)src_1; + const uint64_t mm12 = *((const uint64_t *)src_1 + 1); + + const uint64_t mm21 = *(const uint64_t *)src_2; + const uint64_t mm22 = *((const uint64_t *)src_2 + 1); + + if (mm11 == mm21) + (mm12 < mm22) ? (ret = -1) : (ret = 1); + else + (mm11 < mm21) ? (ret = -1) : (ret = 1); + } + + return ret; +} + +/** + * AVX2 implementation below + */ +#ifdef RTE_MACHINE_CPUFLAG_AVX2 + +/** + * Compare 32 bytes between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp32(const void *src_1, const void *src_2) +{ + const __m128i* src1 = (const __m128i*)src_1; + const __m128i* src2 = (const __m128i*)src_2; + + __m128i mm11 = _mm_lddqu_si128(src1); + __m128i mm12 = _mm_lddqu_si128(src1 + 1); + __m128i mm21 = _mm_lddqu_si128(src2); + __m128i mm22 = _mm_lddqu_si128(src2 + 1); + + __m128i mm1 = _mm_xor_si128(mm11, mm21); + __m128i mm2 = _mm_xor_si128(mm12, mm22); + __m128i mm = _mm_or_si128(mm1, mm2); + + if (unlikely(!_mm_testz_si128(mm, mm))) { + + /* + * Find out which of the two 16-byte blocks + * are different. + */ + if (_mm_testz_si128(mm1, mm1)) { + mm11 = mm12; + mm21 = mm22; + mm1 = mm2; + } + + // Produce the comparison result + __m128i mm_cmp = _mm_cmpgt_epi8(mm21, mm11); + __m128i mm_rcmp = _mm_cmpgt_epi8(mm11, mm21); + mm_cmp = _mm_xor_si128(mm1, mm_cmp); + mm_rcmp = _mm_xor_si128(mm1, mm_rcmp); + + uint32_t cmp = _mm_movemask_epi8(mm_cmp); + uint32_t rcmp = _mm_movemask_epi8(mm_rcmp); + cmp = (cmp - 1u) ^ cmp; + rcmp = (rcmp - 1u) ^ rcmp; + return (int32_t)cmp - (int32_t)rcmp; + } + + return 0; +} + +static inline int +rte_cmp64 (const void* src_1, const void* src_2) +{ + const __m256i* src1 = (const __m256i*)src_1; + const __m256i* src2 = (const __m256i*)src_2; + + __m256i mm11 = _mm256_lddqu_si256(src1); + __m256i mm12 = _mm256_lddqu_si256(src1 + 1); + __m256i mm21 = _mm256_lddqu_si256(src2); + __m256i mm22 = _mm256_lddqu_si256(src2 + 1); + + __m256i mm1 = _mm256_xor_si256(mm11, mm21); + __m256i mm2 = _mm256_xor_si256(mm12, mm22); + __m256i mm = _mm256_or_si256(mm1, mm2); + + if (unlikely(!_mm256_testz_si256(mm, mm))) { + /* + * Find out which of the two 32-byte blocks + * are different. + */ + if (_mm256_testz_si256(mm1, mm1)) { + mm11 = mm12; + mm21 = mm22; + mm1 = mm2; + } + + // Produce the comparison result + __m256i mm_cmp = _mm256_cmpgt_epi8(mm21, mm11); + __m256i mm_rcmp = _mm256_cmpgt_epi8(mm11, mm21); + mm_cmp = _mm256_xor_si256(mm1, mm_cmp); + mm_rcmp = _mm256_xor_si256(mm1, mm_rcmp); + + uint32_t cmp = _mm256_movemask_epi8(mm_cmp); + uint32_t rcmp = _mm256_movemask_epi8(mm_rcmp); + cmp = (cmp - 1u) ^ cmp; + rcmp = (rcmp - 1u) ^ rcmp; + return (int32_t)cmp - (int32_t)rcmp; + } + + return 0; +} + +static inline int +rte_cmp128 (const void* src_1, const void* src_2) +{ + const __m256i* src1 = (const __m256i*)src_1; + const __m256i* src2 = (const __m256i*)src_2; + const size_t n = 2; + size_t i; + + for (i = 0; i < n; ++i, src1 += 2, src2 += 2) { + __m256i mm11 = _mm256_lddqu_si256(src1); + __m256i mm12 = _mm256_lddqu_si256(src1 + 1); + __m256i mm21 = _mm256_lddqu_si256(src2); + __m256i mm22 = _mm256_lddqu_si256(src2 + 1); + + __m256i mm1 = _mm256_xor_si256(mm11, mm21); + __m256i mm2 = _mm256_xor_si256(mm12, mm22); + __m256i mm = _mm256_or_si256(mm1, mm2); + + if (unlikely(!_mm256_testz_si256(mm, mm))) { + /* + * Find out which of the two 32-byte blocks + * are different. + */ + if (_mm256_testz_si256(mm1, mm1)) { + mm11 = mm12; + mm21 = mm22; + mm1 = mm2; + } + + // Produce the comparison result + __m256i mm_cmp = _mm256_cmpgt_epi8(mm21, mm11); + __m256i mm_rcmp = _mm256_cmpgt_epi8(mm11, mm21); + mm_cmp = _mm256_xor_si256(mm1, mm_cmp); + mm_rcmp = _mm256_xor_si256(mm1, mm_rcmp); + + uint32_t cmp = _mm256_movemask_epi8(mm_cmp); + uint32_t rcmp = _mm256_movemask_epi8(mm_rcmp); + cmp = (cmp - 1u) ^ cmp; + rcmp = (rcmp - 1u) ^ rcmp; + return (int32_t)cmp - (int32_t)rcmp; + } + } + + return 0; +} +#else /* RTE_MACHINE_CPUFLAG_AVX2 */ + +/** + * Compare 32 bytes between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp32(const void *src_1, const void *src_2) +{ + int ret; + + ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16, + (const uint8_t *)src_2 + 0 * 16); + + if (likely(ret == 0)) + return rte_cmp16((const uint8_t *)src_1 + 1 * 16, + (const uint8_t *)src_2 + 1 * 16); + + return ret; +} + +/** + * Compare 64 bytes between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp64(const void *src_1, const void *src_2) +{ + int ret; + + ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32, + (const uint8_t *)src_2 + 0 * 32); + + if (likely(ret == 0)) + return rte_cmp32((const uint8_t *)src_1 + 1 * 32, + (const uint8_t *)src_2 + 1 * 32); + + return ret; +} + +/** + * Compare 128 bytes between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp128(const void *src_1, const void *src_2) +{ + int ret; + + ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64, + (const uint8_t *)src_2 + 0 * 64); + + if (likely(ret == 0)) + return rte_cmp64((const uint8_t *)src_1 + 1 * 64, + (const uint8_t *)src_2 + 1 * 64); + + return ret; +} + +#endif /* RTE_MACHINE_CPUFLAG_AVX2 */ + + +/** + * Compare 48 bytes between two locations. + * Locations should not overlap. + */ +static inline int +rte_cmp48(const void *src_1, const void *src_2) +{ + int ret; + + ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32, + (const uint8_t *)src_2 + 0 * 32); + + if (likely(ret == 0)) + return rte_cmp16((const uint8_t *)src_1 + 1 * 32, + (const uint8_t *)src_2 + 1 * 32); + return ret; +} + +static inline int +rte_memcmp_remainder(const uint8_t *src_1u, const uint8_t *src_2u, size_t n) +{ + int ret = 1; + + /** + * Compare less than 16 bytes + */ + if (n & 0x08) { + ret = (*(const uint64_t *)src_1u == + *(const uint64_t *)src_2u); + if (likely(ret == 1)) { + n -= 0x8; + src_1u += 0x8; + src_2u += 0x8; + } else { + goto exit; + } + } + + if (n & 0x04) { + ret = (*(const uint32_t *)src_1u == + *(const uint32_t *)src_2u); + if (likely(ret == 1)) { + n -= 0x4; + src_1u += 0x4; + src_2u += 0x4; + } else { + goto exit; + } + } + + if (n & 0x02) { + ret = (*(const uint16_t *)src_1u == + *(const const uint16_t *)src_2u); + + if (likely(ret == 1)) { + n -= 0x2; + src_1u += 0x2; + src_2u += 0x2; + } else { + goto exit; + } + } + + if (n & 0x01) { + ret = (*(const uint8_t *)src_1u == + *(const uint8_t *)src_2u); + if (likely(ret == 1)) { + return 0; + } else { + goto exit; + } + } + + return !ret; +exit: + + return src_1u < src_2u ? -1 : 1; +} + +static inline int +rte_memcmp(const void *_src_1, const void *_src_2, size_t n) +{ + const uint8_t *src_1 = (const uint8_t *)_src_1; + const uint8_t *src_2 = (const uint8_t *)_src_2; + int ret = 0; + + if (n & 0x80) + return rte_cmp128(src_1, src_2); + + if (n & 0x40) + return rte_cmp64(src_1, src_2); + + if (n & 0x20) { + ret = rte_cmp32(src_1, src_2); + n -= 0x20; + src_1 += 0x20; + src_2 += 0x20; + } + + if ((n & 0x10) && likely(ret == 0)) { + ret = rte_cmp16(src_1, src_2); + n -= 0x10; + src_1 += 0x10; + src_2 += 0x10; + } + + if (n && likely(ret == 0)) + ret = rte_memcmp_remainder(src_1, src_2, n); + + return ret; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCMP_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h new file mode 100644 index 0000000..db9626b --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_memcmp.h @@ -0,0 +1,131 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCMP_H_ +#define _RTE_MEMCMP_H_ + +/** + * @file + * + * Functions for vectorised implementation of memcmp(). + */ + +/** + * Compare 16 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src + * Pointer to the second source of the data. + */ +static inline int +rte_cmp16(const void *src_1, const void *src_2); + +/** + * Compare 32 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + */ +static inline int +rte_cmp32(const void *src_1, const void *src_2); + +/** + * Compare 64 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src + * Pointer to the second source of the data. + */ +static inline int +rte_cmp64(const void *src_1, const void *src_2); + +/** + * Compare 48 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src + * Pointer to the second source of the data. + */ +static inline int +rte_cmp48(const void *src_1, const void *src_2); + +/** + * Compare 128 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + */ +static inline int +rte_cmp128(const void *src_1, const void *src_2); + +#ifdef __DOXYGEN__ + +/** + * Compare bytes between two locations. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * @param n + * Number of bytes to copy. + * @return + * true if match otherwise false. + */ +static int +rte_memcmp(const void *dst, const void *src, size_t n); + +#endif /* __DOXYGEN__ */ + +/* + * memcmp() function used by rte_memcmp macro + */ +static inline int +rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline)); + +#endif /* _RTE_MEMCMP_H_ */ diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c index 9245716..075da62 100644 --- a/lib/librte_hash/rte_hash.c +++ b/lib/librte_hash/rte_hash.c @@ -42,6 +42,7 @@ #include /* for definition of RTE_CACHE_LINE_SIZE */ #include #include +#include #include #include #include @@ -299,6 +300,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, uint8_t *key_bucket; uint32_t bucket_index, i; int32_t pos; + const void * volatile key_1 = key; /* Get the hash signature and bucket index */ sig |= h->sig_msb; @@ -308,10 +310,13 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, /* Check if key is already present in the hash */ for (i = 0; i < h->bucket_entries; i++) { - if ((sig == sig_bucket[i]) && - likely(memcmp(key, get_key_from_bucket(h, key_bucket, i), - h->key_len) == 0)) { - return bucket_index * h->bucket_entries + i; + if (sig == sig_bucket[i]) { + + const void * volatile key_2 = + get_key_from_bucket(h, key_bucket, i); + + if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) + return bucket_index * h->bucket_entries + i; } } @@ -350,6 +355,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, uint8_t *key_bucket; uint32_t bucket_index, i; + const void * volatile key_1 = key; + /* Get the hash signature and bucket index */ sig = sig | h->sig_msb; bucket_index = sig & h->bucket_bitmask; @@ -358,11 +365,14 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, /* Check if key is already present in the hash */ for (i = 0; i < h->bucket_entries; i++) { - if ((sig == sig_bucket[i]) && - likely(memcmp(key, get_key_from_bucket(h, key_bucket, i), - h->key_len) == 0)) { - sig_bucket[i] = NULL_SIGNATURE; - return bucket_index * h->bucket_entries + i; + if (sig == sig_bucket[i]) { + const void * volatile key_2 = + get_key_from_bucket(h, key_bucket, i); + + if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) { + sig_bucket[i] = NULL_SIGNATURE; + return bucket_index * h->bucket_entries + i; + } } } @@ -392,6 +402,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, uint8_t *key_bucket; uint32_t bucket_index, i; + const void * volatile key_1 = key; + /* Get the hash signature and bucket index */ sig |= h->sig_msb; bucket_index = sig & h->bucket_bitmask; @@ -400,10 +412,13 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, /* Check if key is already present in the hash */ for (i = 0; i < h->bucket_entries; i++) { - if ((sig == sig_bucket[i]) && - likely(memcmp(key, get_key_from_bucket(h, key_bucket, i), - h->key_len) == 0)) { - return bucket_index * h->bucket_entries + i; + if (sig == sig_bucket[i]) { + + const void * volatile key_2 = + get_key_from_bucket(h, key_bucket, i); + + if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) + return bucket_index * h->bucket_entries + i; } } @@ -456,13 +471,17 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, positions[i] = -ENOENT; for (j = 0; j < h->bucket_entries; j++) { - if ((sigs[i] == sig_bucket[j]) && - likely(memcmp(keys[i], - get_key_from_bucket(h, key_bucket, j), - h->key_len) == 0)) { - positions[i] = bucket_index * - h->bucket_entries + j; - break; + if (sigs[i] == sig_bucket[j]) { + + const void * volatile key_1 = keys[i]; + const void * volatile key_2 = + get_key_from_bucket(h, key_bucket, j); + if (likely(rte_memcmp(key_1, key_2, + h->key_len) == 0)) { + positions[i] = bucket_index * + h->bucket_entries + j; + break; + } } } } -- 1.9.1