From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga06.intel.com (mga06.intel.com [134.134.136.31]) by dpdk.org (Postfix) with ESMTP id 491F01B29A for ; Tue, 3 Oct 2017 06:35:27 +0200 (CEST) Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga104.jf.intel.com with ESMTP; 02 Oct 2017 21:35:27 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.42,472,1500966000"; d="scan'208";a="1178016168" Received: from bdw-yipeng.jf.intel.com ([10.54.81.30]) by orsmga001.jf.intel.com with ESMTP; 02 Oct 2017 21:35:26 -0700 From: Yipeng Wang To: dev@dpdk.org, pablo.de.lara.guarch@intel.com Cc: thomas@monjalon.net, charlie.tai@intel.com, sameh.gobriel@intel.com, john.mcnamara@intel.com, Yipeng Wang Date: Mon, 2 Oct 2017 21:31:39 -0700 Message-Id: <1507005102-43821-5-git-send-email-yipeng1.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1507005102-43821-1-git-send-email-yipeng1.wang@intel.com> References: <1506534034-39433-1-git-send-email-yipeng1.wang@intel.com> <1507005102-43821-1-git-send-email-yipeng1.wang@intel.com> Subject: [dpdk-dev] [PATCH v5 4/7] member: add AVX for HT mode X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 03 Oct 2017 04:35:27 -0000 For key search, the signatures of all entries are compared against the signature of the key that is being looked up. Since all signatures are contiguously put in a bucket, they can be compared with vector instructions (AVX2), achieving higher lookup performance. This patch adds AVX2 implementation in a separate header file. Signed-off-by: Yipeng Wang --- lib/librte_member/rte_member_ht.c | 142 +++++++++++++++++++++++++++++-------- lib/librte_member/rte_member_x86.h | 107 ++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+), 31 deletions(-) create mode 100644 lib/librte_member/rte_member_x86.h diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c index 08cce02..2b37bda 100644 --- a/lib/librte_member/rte_member_ht.c +++ b/lib/librte_member/rte_member_ht.c @@ -40,6 +40,10 @@ #include "rte_member.h" #include "rte_member_ht.h" +#if defined(RTE_ARCH_X86) +#include "rte_member_x86.h" +#endif + /* Search bucket for entry with tmp_sig and update set_id */ static inline int update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig, @@ -136,6 +140,13 @@ rte_member_create_ht(struct rte_member_setsum *ss, for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) buckets[i].sets[j] = RTE_MEMBER_NO_MATCH; } +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) && + RTE_MEMBER_BUCKET_ENTRIES == 16) + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2; + else +#endif + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR; RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, " "the table has %u entries, %u buckets\n", @@ -193,11 +204,23 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss, *set_id = RTE_MEMBER_NO_MATCH; get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); - if (search_bucket_single(prim_bucket, tmp_sig, buckets, - set_id) || - search_bucket_single(sec_bucket, tmp_sig, - buckets, set_id)) - return 1; + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single_avx(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + break; +#endif + default: + if (search_bucket_single(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + } return 0; } @@ -221,13 +244,27 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss, } for (i = 0; i < num_keys; i++) { - if (search_bucket_single(prim_buckets[i], tmp_sig[i], - buckets, &set_id[i]) || - search_bucket_single(sec_buckets[i], - tmp_sig[i], buckets, &set_id[i])) - num_matches++; - else - set_id[i] = RTE_MEMBER_NO_MATCH; + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_buckets[i], + tmp_sig[i], buckets, &set_id[i]) || + search_bucket_single_avx(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + num_matches++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + break; +#endif + default: + if (search_bucket_single(prim_buckets[i], tmp_sig[i], + buckets, &set_id[i]) || + search_bucket_single(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + num_matches++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + } } return num_matches; } @@ -244,12 +281,24 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss, get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); - search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches, - match_per_key, set_id); - if (num_matches < match_per_key) - search_bucket_multi(sec_bucket, tmp_sig, - buckets, &num_matches, match_per_key, set_id); - return num_matches; + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_bucket, tmp_sig, buckets, + &num_matches, match_per_key, set_id); + if (num_matches < match_per_key) + search_bucket_multi_avx(sec_bucket, tmp_sig, + buckets, &num_matches, match_per_key, set_id); + return num_matches; +#endif + default: + search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches, + match_per_key, set_id); + if (num_matches < match_per_key) + search_bucket_multi(sec_bucket, tmp_sig, + buckets, &num_matches, match_per_key, set_id); + return num_matches; + } } uint32_t @@ -275,16 +324,34 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss, for (i = 0; i < num_keys; i++) { match_cnt_tmp = 0; - search_bucket_multi(prim_buckets[i], tmp_sig[i], - buckets, &match_cnt_tmp, match_per_key, - &set_ids[i*match_per_key]); - if (match_cnt_tmp < match_per_key) - search_bucket_multi(sec_buckets[i], tmp_sig[i], + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_buckets[i], tmp_sig[i], buckets, &match_cnt_tmp, match_per_key, &set_ids[i*match_per_key]); - match_count[i] = match_cnt_tmp; - if (match_cnt_tmp != 0) - num_matches++; + if (match_cnt_tmp < match_per_key) + search_bucket_multi_avx(sec_buckets[i], + tmp_sig[i], buckets, &match_cnt_tmp, + match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_tmp; + if (match_cnt_tmp != 0) + num_matches++; + break; +#endif + default: + search_bucket_multi(prim_buckets[i], tmp_sig[i], + buckets, &match_cnt_tmp, match_per_key, + &set_ids[i*match_per_key]); + if (match_cnt_tmp < match_per_key) + search_bucket_multi(sec_buckets[i], tmp_sig[i], + buckets, &match_cnt_tmp, match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_tmp; + if (match_cnt_tmp != 0) + num_matches++; + } } return num_matches; } @@ -315,11 +382,24 @@ try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, static inline int try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, - member_sig_t sig, member_set_t set_id) + member_sig_t sig, member_set_t set_id, + enum rte_member_sig_compare_function cmp_fn) { - if (update_entry_search(prim, sig, buckets, set_id) || - update_entry_search(sec, sig, buckets, set_id)) - return 0; + switch (cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (update_entry_search_avx(prim, sig, buckets, set_id) || + update_entry_search_avx(sec, sig, buckets, + set_id)) + return 0; + break; +#endif + default: + if (update_entry_search(prim, sig, buckets, set_id) || + update_entry_search(sec, sig, buckets, + set_id)) + return 0; + } return -1; } @@ -430,7 +510,7 @@ rte_member_add_ht(const struct rte_member_setsum *ss, */ if (ss->cache) { ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig, - set_id); + set_id, ss->sig_cmp_fn); if (ret != -1) return ret; } diff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h new file mode 100644 index 0000000..d29dd3f --- /dev/null +++ b/lib/librte_member/rte_member_x86.h @@ -0,0 +1,107 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMBER_X86_H_ +#define _RTE_MEMBER_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#if defined(RTE_MACHINE_CPUFLAG_AVX2) + +static inline int +update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs), + _mm256_set1_epi16(tmp_sig))); + if (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1; + buckets[bucket_id].sets[hit_idx] = set_id; + return 1; + } + return 0; +} + +static inline int +search_bucket_single_avx(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + member_set_t *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1; + if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + *set_id = buckets[bucket_id].sets[hit_idx]; + return 1; + } + hitmask &= ~(3U << ((hit_idx) << 1)); + } + return 0; +} + +static inline void +search_bucket_multi_avx(uint32_t bucket_id, member_sig_t tmp_sig, + struct member_ht_bucket *buckets, + uint32_t *counter, + uint32_t match_per_key, + member_set_t *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1; + if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + set_id[*counter] = buckets[bucket_id].sets[hit_idx]; + (*counter)++; + if (*counter >= match_per_key) + return; + } + hitmask &= ~(3U << ((hit_idx) << 1)); + } +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_X86_H_ */ -- 2.7.4