From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id 5B3F66CC2 for ; Wed, 5 Oct 2016 01:24:09 +0200 (CEST) Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga105.fm.intel.com with ESMTP; 04 Oct 2016 16:24:08 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.31,446,1473145200"; d="scan'208";a="1049549714" Received: from sie-lab-214-036.ir.intel.com (HELO silpixa00394365.ir.intel.com) ([10.237.214.36]) by fmsmga001.fm.intel.com with ESMTP; 04 Oct 2016 16:24:08 -0700 From: Pablo de Lara To: dev@dpdk.org Cc: bruce.richardson@intel.com, Byron Marohn , Saikrishna Edupuganti , Pablo de Lara Date: Wed, 5 Oct 2016 00:25:14 +0100 Message-Id: <1475623515-47587-4-git-send-email-pablo.de.lara.guarch@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1475623515-47587-1-git-send-email-pablo.de.lara.guarch@intel.com> References: <1475221136-213246-1-git-send-email-pablo.de.lara.guarch@intel.com> <1475623515-47587-1-git-send-email-pablo.de.lara.guarch@intel.com> Subject: [dpdk-dev] [PATCH v5 3/4] hash: add vectorized comparison X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 04 Oct 2016 23:24:10 -0000 From: Byron Marohn In lookup bulk function, the signatures of all entries are compared against the signature of the key that is being looked up. Now that all the signatures are together, they can be compared with vector instructions (SSE, AVX2), achieving higher lookup performance. Also, entries per bucket are increased to 8 when using processors with AVX2, as 256 bits can be compared at once, which is the size of 8x32-bit signatures. Signed-off-by: Byron Marohn Signed-off-by: Saikrishna Edupuganti Signed-off-by: Pablo de Lara Acked-by: Bruce Richardson Acked-by: Sameh Gobriel --- lib/librte_hash/rte_cuckoo_hash.c | 76 +++++++++++++++++++++++++++++++++++---- lib/librte_hash/rte_cuckoo_hash.h | 12 ++++++- 2 files changed, 81 insertions(+), 7 deletions(-) diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index a7ee2b9..d762f36 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -284,6 +284,15 @@ rte_hash_create(const struct rte_hash_parameters *params) h->free_slots = r; h->hw_trans_mem_support = hw_trans_mem_support; +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2; + else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_SSE; + else +#endif + h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR; + /* Turn on multi-writer only with explicit flat from user and TM * support. */ @@ -940,6 +949,62 @@ lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash, rte_prefetch0(*secondary_bkt); } +static inline void +compare_signatures(unsigned int *prim_hash_matches, + unsigned int *sec_hash_matches, + const struct rte_hash_bucket *prim_bkt, + const struct rte_hash_bucket *sec_bkt, + hash_sig_t prim_hash, hash_sig_t sec_hash, + enum rte_hash_sig_compare_function sig_cmp_fn) +{ + unsigned int i; + + switch (sig_cmp_fn) { +#ifdef RTE_MACHINE_CPUFLAG_AVX2 + case RTE_HASH_COMPARE_AVX2: + *prim_hash_matches |= _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)prim_bkt->sig_current), + _mm256_set1_epi32(prim_hash))); + *sec_hash_matches |= _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)sec_bkt->sig_current), + _mm256_set1_epi32(sec_hash))); + break; +#endif +#ifdef RTE_MACHINE_CPUFLAG_SSE2 + case RTE_HASH_COMPARE_SSE: + /* Compare the first 4 signatures in the bucket */ + *prim_hash_matches |= _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)prim_bkt->sig_current), + _mm_set1_epi32(prim_hash))); + *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&prim_bkt->sig_current[4]), + _mm_set1_epi32(prim_hash)))) << 4; + /* Compare the first 4 signatures in the bucket */ + *sec_hash_matches |= _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)sec_bkt->sig_current), + _mm_set1_epi32(sec_hash))); + *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&sec_bkt->sig_current[4]), + _mm_set1_epi32(sec_hash)))) << 4; + break; +#endif + default: + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + *prim_hash_matches |= + ((prim_hash == prim_bkt->sig_current[i]) << i); + *sec_hash_matches |= + ((sec_hash == sec_bkt->sig_current[i]) << i); + } + } + +} + /* * Lookup bulk stage 2: Search for match hashes in primary/secondary locations * and prefetch first key slot @@ -952,15 +1017,14 @@ lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash, uint64_t *extra_hits_mask, const void *keys, const struct rte_hash *h) { - unsigned prim_hash_matches, sec_hash_matches, key_idx, i; - unsigned total_hash_matches; + unsigned int prim_hash_matches, sec_hash_matches, key_idx; + unsigned int total_hash_matches; prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - prim_hash_matches |= ((prim_hash == prim_bkt->sig_current[i]) << i); - sec_hash_matches |= ((sec_hash == sec_bkt->sig_current[i]) << i); - } + + compare_signatures(&prim_hash_matches, &sec_hash_matches, prim_bkt, + sec_bkt, prim_hash, sec_hash, h->sig_cmp_fn); key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)]; if (key_idx == 0) diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h index 6549731..504661d 100644 --- a/lib/librte_hash/rte_cuckoo_hash.h +++ b/lib/librte_hash/rte_cuckoo_hash.h @@ -130,7 +130,7 @@ enum add_key_case { }; /** Number of items per bucket. */ -#define RTE_HASH_BUCKET_ENTRIES 4 +#define RTE_HASH_BUCKET_ENTRIES 8 #define NULL_SIGNATURE 0 @@ -161,6 +161,14 @@ struct rte_hash_key { char key[0]; } __attribute__((aligned(KEY_ALIGNMENT))); +/* All different signature compare functions */ +enum rte_hash_sig_compare_function { + RTE_HASH_COMPARE_SCALAR = 0, + RTE_HASH_COMPARE_SSE, + RTE_HASH_COMPARE_AVX2, + RTE_HASH_COMPARE_NUM +}; + /** Bucket structure */ struct rte_hash_bucket { hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES]; @@ -199,6 +207,8 @@ struct rte_hash { /**< Custom function used to compare keys. */ enum cmp_jump_table_case cmp_jump_table_idx; /**< Indicates which compare function to use. */ + enum rte_hash_sig_compare_function sig_cmp_fn; + /**< Indicates which signature compare function to use. */ uint32_t bucket_bitmask; /**< Bitmask for getting bucket index from hash signature. */ uint32_t key_entry_size; /**< Size of each key entry. */ -- 2.7.4