From: Tony Lu <zlu@ezchip.com>
To: 'Pablo de Lara' <pablo.de.lara.guarch@intel.com>, <dev@dpdk.org>
Subject: Re: [dpdk-dev] [PATCH v2] hash: fix compilation for non-x86 systems
Date: Fri, 17 Jul 2015 22:41:59 +0800 [thread overview]
Message-ID: <001501d0c09e$cf739a50$6e5acef0$@com> (raw)
In-Reply-To: <1437124678-24915-1-git-send-email-pablo.de.lara.guarch@intel.com>
>-----Original Message-----
>From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pablo de Lara
>Sent: Friday, July 17, 2015 5:18 PM
>To: dev@dpdk.org
>Subject: [dpdk-dev] [PATCH v2] hash: fix compilation for non-x86 systems
>
>From: "Pablo de Lara" <pablo.de.lara.guarch@intel.com>
>
>Hash library uses optimized compare functions that use
>x86 intrinsics, therefore non-x86 systems could not build
>the library. In that case, the compare function is set
>to the generic memcmp.
>
>Fixes: 48a399119619 ("hash: replace with cuckoo hash implementation")
>
>Reported-by: Tony Lu <zlu@ezchip.com>
>Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
>---
>Changes in v2:
>- Renamed new file rte_cmp_fns.h to rte_cmp_x86.h
>- Removed blank line
>
> lib/librte_hash/rte_cmp_x86.h | 109
>++++++++++++++++++++++++++++++++++++++
> lib/librte_hash/rte_cuckoo_hash.c | 96 ++++-----------------------------
> 2 files changed, 120 insertions(+), 85 deletions(-)
> create mode 100644 lib/librte_hash/rte_cmp_x86.h
>
>diff --git a/lib/librte_hash/rte_cmp_x86.h b/lib/librte_hash/rte_cmp_x86.h
>new file mode 100644
>index 0000000..7f79bac
>--- /dev/null
>+++ b/lib/librte_hash/rte_cmp_x86.h
>@@ -0,0 +1,109 @@
>+/*-
>+ * BSD LICENSE
>+ *
>+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
>+ * All rights reserved.
>+ *
>+ * Redistribution and use in source and binary forms, with or without
>+ * modification, are permitted provided that the following conditions
>+ * are met:
>+ *
>+ * * Redistributions of source code must retain the above copyright
>+ * notice, this list of conditions and the following disclaimer.
>+ * * Redistributions in binary form must reproduce the above copyright
>+ * notice, this list of conditions and the following disclaimer in
>+ * the documentation and/or other materials provided with the
>+ * distribution.
>+ * * Neither the name of Intel Corporation nor the names of its
>+ * contributors may be used to endorse or promote products derived
>+ * from this software without specific prior written permission.
>+ *
>+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>CONTRIBUTORS
>+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
>NOT
>+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>FITNESS FOR
>+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>COPYRIGHT
>+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>INCIDENTAL,
>+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
>NOT
>+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
>OF USE,
>+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
>AND ON ANY
>+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>TORT
>+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
>THE USE
>+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>DAMAGE.
>+ */
>+
>+/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
>+static int
>+rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len
>__rte_unused)
>+{
>+ const __m128i k1 = _mm_loadu_si128((const __m128i *) key1);
>+ const __m128i k2 = _mm_loadu_si128((const __m128i *) key2);
>+#ifdef RTE_MACHINE_CPUFLAG_SSE4_1
>+ const __m128i x = _mm_xor_si128(k1, k2);
>+
>+ return !_mm_test_all_zeros(x, x);
>+#else
>+ const __m128i x = _mm_cmpeq_epi32(k1, k2);
>+
>+ return (_mm_movemask_epi8(x) != 0xffff);
>+#endif
>+}
>+
>+static int
>+rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
>+{
>+ return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
>+ rte_hash_k16_cmp_eq((const char *) key1 + 16,
>+ (const char *) key2 + 16, key_len);
>+}
>+
>+static int
>+rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
>+{
>+ return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
>+ rte_hash_k16_cmp_eq((const char *) key1 + 16,
>+ (const char *) key2 + 16, key_len) ||
>+ rte_hash_k16_cmp_eq((const char *) key1 + 32,
>+ (const char *) key2 + 32, key_len);
>+}
>+
>+static int
>+rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
>+{
>+ return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
>+ rte_hash_k32_cmp_eq((const char *) key1 + 32,
>+ (const char *) key2 + 32, key_len);
>+}
>+
>+static int
>+rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
>+{
>+ return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
>+ rte_hash_k16_cmp_eq((const char *) key1 + 64,
>+ (const char *) key2 + 64, key_len);
>+}
>+
>+static int
>+rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
>+{
>+ return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
>+ rte_hash_k32_cmp_eq((const char *) key1 + 64,
>+ (const char *) key2 + 64, key_len);
>+}
>+
>+static int
>+rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
>+{
>+ return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
>+ rte_hash_k32_cmp_eq((const char *) key1 + 64,
>+ (const char *) key2 + 64, key_len) ||
>+ rte_hash_k16_cmp_eq((const char *) key1 + 96,
>+ (const char *) key2 + 96, key_len);
>+}
>+
>+static int
>+rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
>+{
>+ return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
>+ rte_hash_k64_cmp_eq((const char *) key1 + 64,
>+ (const char *) key2 + 64, key_len);
>+}
>diff --git a/lib/librte_hash/rte_cuckoo_hash.c
>b/lib/librte_hash/rte_cuckoo_hash.c
>index d9ba066..dec18ce 100644
>--- a/lib/librte_hash/rte_cuckoo_hash.c
>+++ b/lib/librte_hash/rte_cuckoo_hash.c
>@@ -59,6 +59,9 @@
> #include <rte_compat.h>
>
> #include "rte_hash.h"
>+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) ||
>defined(RTE_ARCH_X86_X32)
>+#include "rte_cmp_x86.h"
>+#endif
>
> TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
>
>@@ -94,14 +97,6 @@ EAL_REGISTER_TAILQ(rte_hash_tailq)
> #define KEY_ALIGNMENT 16
>
> typedef int (*rte_hash_cmp_eq_t)(const void *key1, const void *key2,
size_t
>key_len);
>-static int rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t
>key_len);
>-static int rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t
>key_len);
>-static int rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t
>key_len);
>-static int rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t
>key_len);
>-static int rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t
>key_len);
>-static int rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t
>key_len);
>-static int rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t
>key_len);
>-static int rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t
>key_len);
>
> /** A hash table structure. */
> struct rte_hash {
>@@ -253,6 +248,11 @@ rte_hash_create(const struct rte_hash_parameters
>*params)
> goto err;
> }
>
>+/*
>+ * If x86 architecture is used, select appropriate compare function,
>+ * which may use x86 instrinsics, otherwise use memcmp
>+ */
>+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) ||
>defined(RTE_ARCH_X86_X32)
> /* Select function to compare keys */
> switch (params->key_len) {
> case 16:
>@@ -283,6 +283,9 @@ rte_hash_create(const struct rte_hash_parameters
>*params)
> /* If key is not multiple of 16, use generic memcmp */
> h->rte_hash_cmp_eq = memcmp;
> }
>+#else
>+ h->rte_hash_cmp_eq = memcmp;
>+#endif
>
> snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
> r = rte_ring_lookup(ring_name);
>@@ -1118,80 +1121,3 @@ rte_hash_iterate(const struct rte_hash *h, const
>void **key, void **data, uint32
>
> return (position - 1);
> }
>-
>-/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
>-static int
>-rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len
>__rte_unused)
>-{
>- const __m128i k1 = _mm_loadu_si128((const __m128i *) key1);
>- const __m128i k2 = _mm_loadu_si128((const __m128i *) key2);
>-#ifdef RTE_MACHINE_CPUFLAG_SSE4_1
>- const __m128i x = _mm_xor_si128(k1, k2);
>-
>- return !_mm_test_all_zeros(x, x);
>-#else
>- const __m128i x = _mm_cmpeq_epi32(k1, k2);
>-
>- return (_mm_movemask_epi8(x) != 0xffff);
>-#endif
>-}
>-
>-static int
>-rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
>-{
>- return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
>- rte_hash_k16_cmp_eq((const char *) key1 + 16,
>- (const char *) key2 + 16, key_len);
>-}
>-
>-static int
>-rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
>-{
>- return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
>- rte_hash_k16_cmp_eq((const char *) key1 + 16,
>- (const char *) key2 + 16, key_len) ||
>- rte_hash_k16_cmp_eq((const char *) key1 + 32,
>- (const char *) key2 + 32, key_len);
>-}
>-
>-static int
>-rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
>-{
>- return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
>- rte_hash_k32_cmp_eq((const char *) key1 + 32,
>- (const char *) key2 + 32, key_len);
>-}
>-
>-static int
>-rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
>-{
>- return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
>- rte_hash_k16_cmp_eq((const char *) key1 + 64,
>- (const char *) key2 + 64, key_len);
>-}
>-
>-static int
>-rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
>-{
>- return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
>- rte_hash_k32_cmp_eq((const char *) key1 + 64,
>- (const char *) key2 + 64, key_len);
>-}
>-
>-static int
>-rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
>-{
>- return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
>- rte_hash_k32_cmp_eq((const char *) key1 + 64,
>- (const char *) key2 + 64, key_len) ||
>- rte_hash_k16_cmp_eq((const char *) key1 + 96,
>- (const char *) key2 + 96, key_len);
>-}
>-
>-static int
>-rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
>-{
>- return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
>- rte_hash_k64_cmp_eq((const char *) key1 + 64,
>- (const char *) key2 + 64, key_len);
>-}
>--
>2.4.2
Acked-by: Zhigang Lu <zlu@ezchip.com>
next prev parent reply other threads:[~2015-07-17 14:42 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-17 9:17 Pablo de Lara
2015-07-17 14:41 ` Tony Lu [this message]
2015-07-18 17:50 ` Thomas Monjalon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='001501d0c09e$cf739a50$6e5acef0$@com' \
--to=zlu@ezchip.com \
--cc=dev@dpdk.org \
--cc=pablo.de.lara.guarch@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).