DPDK patches and discussions
 help / color / mirror / Atom feed
From: Pablo de Lara <pablo.de.lara.guarch@intel.com>
To: dev@dpdk.org
Cc: bruce.richardson@intel.com, Byron Marohn <byron.marohn@intel.com>,
	Saikrishna Edupuganti <saikrishna.edupuganti@intel.com>,
	Pablo de Lara <pablo.de.lara.guarch@intel.com>
Subject: [dpdk-dev] [PATCH v5 3/4] hash: add vectorized comparison
Date: Wed,  5 Oct 2016 00:25:14 +0100	[thread overview]
Message-ID: <1475623515-47587-4-git-send-email-pablo.de.lara.guarch@intel.com> (raw)
In-Reply-To: <1475623515-47587-1-git-send-email-pablo.de.lara.guarch@intel.com>

From: Byron Marohn <byron.marohn@intel.com>

In lookup bulk function, the signatures of all entries
are compared against the signature of the key that is being looked up.
Now that all the signatures are together, they can be compared
with vector instructions (SSE, AVX2), achieving higher lookup performance.

Also, entries per bucket are increased to 8 when using processors
with AVX2, as 256 bits can be compared at once, which is the size of
8x32-bit signatures.

Signed-off-by: Byron Marohn <byron.marohn@intel.com>
Signed-off-by: Saikrishna Edupuganti <saikrishna.edupuganti@intel.com>
Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
Acked-by: Sameh Gobriel <sameh.gobriel@intel.com>
---
 lib/librte_hash/rte_cuckoo_hash.c | 76 +++++++++++++++++++++++++++++++++++----
 lib/librte_hash/rte_cuckoo_hash.h | 12 ++++++-
 2 files changed, 81 insertions(+), 7 deletions(-)

diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index a7ee2b9..d762f36 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -284,6 +284,15 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->free_slots = r;
 	h->hw_trans_mem_support = hw_trans_mem_support;
 
+#if defined(RTE_ARCH_X86)
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+		h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
+	else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
+		h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
+	else
+#endif
+		h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
+
 	/* Turn on multi-writer only with explicit flat from user and TM
 	 * support.
 	 */
@@ -940,6 +949,62 @@ lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash,
 	rte_prefetch0(*secondary_bkt);
 }
 
+static inline void
+compare_signatures(unsigned int *prim_hash_matches,
+			unsigned int *sec_hash_matches,
+			const struct rte_hash_bucket *prim_bkt,
+			const struct rte_hash_bucket *sec_bkt,
+			hash_sig_t prim_hash, hash_sig_t sec_hash,
+			enum rte_hash_sig_compare_function sig_cmp_fn)
+{
+	unsigned int i;
+
+	switch (sig_cmp_fn) {
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+	case RTE_HASH_COMPARE_AVX2:
+		*prim_hash_matches |= _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
+				_mm256_load_si256(
+					(__m256i const *)prim_bkt->sig_current),
+				_mm256_set1_epi32(prim_hash)));
+		*sec_hash_matches |= _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
+				_mm256_load_si256(
+					(__m256i const *)sec_bkt->sig_current),
+				_mm256_set1_epi32(sec_hash)));
+		break;
+#endif
+#ifdef RTE_MACHINE_CPUFLAG_SSE2
+	case RTE_HASH_COMPARE_SSE:
+		/* Compare the first 4 signatures in the bucket */
+		*prim_hash_matches |= _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_load_si128(
+					(__m128i const *)prim_bkt->sig_current),
+				_mm_set1_epi32(prim_hash)));
+		*prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_load_si128(
+					(__m128i const *)&prim_bkt->sig_current[4]),
+				_mm_set1_epi32(prim_hash)))) << 4;
+		/* Compare the first 4 signatures in the bucket */
+		*sec_hash_matches |= _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_load_si128(
+					(__m128i const *)sec_bkt->sig_current),
+				_mm_set1_epi32(sec_hash)));
+		*sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_load_si128(
+					(__m128i const *)&sec_bkt->sig_current[4]),
+				_mm_set1_epi32(sec_hash)))) << 4;
+		break;
+#endif
+	default:
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			*prim_hash_matches |=
+				((prim_hash == prim_bkt->sig_current[i]) << i);
+			*sec_hash_matches |=
+				((sec_hash == sec_bkt->sig_current[i]) << i);
+		}
+	}
+
+}
+
 /*
  * Lookup bulk stage 2:  Search for match hashes in primary/secondary locations
  * and prefetch first key slot
@@ -952,15 +1017,14 @@ lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash,
 		uint64_t *extra_hits_mask, const void *keys,
 		const struct rte_hash *h)
 {
-	unsigned prim_hash_matches, sec_hash_matches, key_idx, i;
-	unsigned total_hash_matches;
+	unsigned int prim_hash_matches, sec_hash_matches, key_idx;
+	unsigned int total_hash_matches;
 
 	prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
 	sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		prim_hash_matches |= ((prim_hash == prim_bkt->sig_current[i]) << i);
-		sec_hash_matches |= ((sec_hash == sec_bkt->sig_current[i]) << i);
-	}
+
+	compare_signatures(&prim_hash_matches, &sec_hash_matches, prim_bkt,
+				sec_bkt, prim_hash, sec_hash, h->sig_cmp_fn);
 
 	key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)];
 	if (key_idx == 0)
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index 6549731..504661d 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -130,7 +130,7 @@ enum add_key_case {
 };
 
 /** Number of items per bucket. */
-#define RTE_HASH_BUCKET_ENTRIES		4
+#define RTE_HASH_BUCKET_ENTRIES		8
 
 #define NULL_SIGNATURE			0
 
@@ -161,6 +161,14 @@ struct rte_hash_key {
 	char key[0];
 } __attribute__((aligned(KEY_ALIGNMENT)));
 
+/* All different signature compare functions */
+enum rte_hash_sig_compare_function {
+	RTE_HASH_COMPARE_SCALAR = 0,
+	RTE_HASH_COMPARE_SSE,
+	RTE_HASH_COMPARE_AVX2,
+	RTE_HASH_COMPARE_NUM
+};
+
 /** Bucket structure */
 struct rte_hash_bucket {
 	hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES];
@@ -199,6 +207,8 @@ struct rte_hash {
 	/**< Custom function used to compare keys. */
 	enum cmp_jump_table_case cmp_jump_table_idx;
 	/**< Indicates which compare function to use. */
+	enum rte_hash_sig_compare_function sig_cmp_fn;
+	/**< Indicates which signature compare function to use. */
 	uint32_t bucket_bitmask;
 	/**< Bitmask for getting bucket index from hash signature. */
 	uint32_t key_entry_size;         /**< Size of each key entry. */
-- 
2.7.4

  parent reply	other threads:[~2016-10-04 23:24 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-26 21:34 [dpdk-dev] [PATCH 0/3] Cuckoo hash lookup enhancements Pablo de Lara
2016-08-26 21:34 ` [dpdk-dev] [PATCH 1/3] hash: reorganize bucket structure Pablo de Lara
2016-08-26 21:34 ` [dpdk-dev] [PATCH 2/3] hash: add vectorized comparison Pablo de Lara
2016-08-27  8:57   ` Thomas Monjalon
2016-09-02 17:05     ` De Lara Guarch, Pablo
2016-08-26 21:34 ` [dpdk-dev] [PATCH 3/3] hash: modify lookup bulk pipeline Pablo de Lara
2016-09-02 22:56 ` [dpdk-dev] [PATCH v2 0/4] Cuckoo hash lookup enhancements Pablo de Lara
2016-09-02 22:56   ` [dpdk-dev] [PATCH v2 1/4] hash: reorder hash structure Pablo de Lara
2016-09-02 22:56   ` [dpdk-dev] [PATCH v2 2/4] hash: reorganize bucket structure Pablo de Lara
2016-09-02 22:56   ` [dpdk-dev] [PATCH v2 3/4] hash: add vectorized comparison Pablo de Lara
2016-09-02 22:56   ` [dpdk-dev] [PATCH v2 4/4] hash: modify lookup bulk pipeline Pablo de Lara
2016-09-06 19:33   ` [dpdk-dev] [PATCH v3 0/4] Cuckoo hash lookup enhancements Pablo de Lara
2016-09-30  7:38     ` [dpdk-dev] [PATCH v4 0/4] Cuckoo hash enhancements Pablo de Lara
2016-09-30  7:38       ` [dpdk-dev] [PATCH v4 1/4] hash: reorder hash structure Pablo de Lara
2016-09-30  7:38       ` [dpdk-dev] [PATCH v4 2/4] hash: reorganize bucket structure Pablo de Lara
2016-09-30  7:38       ` [dpdk-dev] [PATCH v4 3/4] hash: add vectorized comparison Pablo de Lara
2016-09-30  7:38       ` [dpdk-dev] [PATCH v4 4/4] hash: modify lookup bulk pipeline Pablo de Lara
2016-09-30 19:53       ` [dpdk-dev] [PATCH v4 0/4] Cuckoo hash enhancements Gobriel, Sameh
2016-10-03  9:59       ` Bruce Richardson
2016-10-04  6:50         ` De Lara Guarch, Pablo
2016-10-04  7:17           ` De Lara Guarch, Pablo
2016-10-04  9:47             ` Bruce Richardson
2016-10-04 23:25       ` [dpdk-dev] [PATCH v5 " Pablo de Lara
2016-10-04 23:25         ` [dpdk-dev] [PATCH v5 1/4] hash: reorder hash structure Pablo de Lara
2016-10-04 23:25         ` [dpdk-dev] [PATCH v5 2/4] hash: reorganize bucket structure Pablo de Lara
2016-10-04 23:25         ` Pablo de Lara [this message]
2016-10-04 23:25         ` [dpdk-dev] [PATCH v5 4/4] hash: modify lookup bulk pipeline Pablo de Lara
2016-10-05 10:12         ` [dpdk-dev] [PATCH v5 0/4] Cuckoo hash enhancements Thomas Monjalon
2016-09-06 19:34   ` [dpdk-dev] [PATCH v3 0/4] Cuckoo hash lookup enhancements Pablo de Lara
2016-09-06 19:34     ` [dpdk-dev] [PATCH v3 1/4] hash: reorder hash structure Pablo de Lara
2016-09-28  9:02       ` Bruce Richardson
2016-09-29  1:33         ` De Lara Guarch, Pablo
2016-09-06 19:34     ` [dpdk-dev] [PATCH v3 2/4] hash: reorganize bucket structure Pablo de Lara
2016-09-28  9:05       ` Bruce Richardson
2016-09-29  1:40         ` De Lara Guarch, Pablo
2016-09-06 19:34     ` [dpdk-dev] [PATCH v3 3/4] hash: add vectorized comparison Pablo de Lara
2016-09-06 19:34     ` [dpdk-dev] [PATCH v3 4/4] hash: modify lookup bulk pipeline Pablo de Lara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1475623515-47587-4-git-send-email-pablo.de.lara.guarch@intel.com \
    --to=pablo.de.lara.guarch@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=byron.marohn@intel.com \
    --cc=dev@dpdk.org \
    --cc=saikrishna.edupuganti@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).