From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id 1AF532C2F for ; Wed, 6 Sep 2017 02:00:44 +0200 (CEST) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by orsmga105.jf.intel.com with ESMTP; 05 Sep 2017 17:00:44 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.41,481,1498546800"; d="scan'208";a="897423612" Received: from bdw-yipeng.jf.intel.com ([10.54.81.30]) by FMSMGA003.fm.intel.com with ESMTP; 05 Sep 2017 17:00:44 -0700 From: Yipeng Wang To: dev@dpdk.org Cc: charlie.tai@intel.com, sameh.gobriel@intel.com, ren.wang@intel.com, yipeng1.wang@intel.com, john.mcnamara@intel.com Date: Tue, 5 Sep 2017 16:59:46 -0700 Message-Id: <1504655989-1518-5-git-send-email-yipeng1.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1504655989-1518-1-git-send-email-yipeng1.wang@intel.com> References: <1504315481-12854-1-git-send-email-yipeng1.wang@intel.com> <1504655989-1518-1-git-send-email-yipeng1.wang@intel.com> Subject: [dpdk-dev] [PATCH v3 4/7] member: add AVX for HT mode X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 06 Sep 2017 00:00:45 -0000 For key search, the signatures of all entries are compared against the signature of the key that is being looked up. Since all signatures are contguously put in a bucket, they can be compared with vector instructions (AVX2), achieving higher lookup performance. This patch adds AVX2 implementation in a separate header file. Signed-off-by: Yipeng Wang --- lib/librte_member/rte_member_ht.c | 143 ++++++++++++++++++++++++++++--------- lib/librte_member/rte_member_x86.h | 111 ++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 32 deletions(-) create mode 100644 lib/librte_member/rte_member_x86.h diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c index b2ae6d0..15e2534 100644 --- a/lib/librte_member/rte_member_ht.c +++ b/lib/librte_member/rte_member_ht.c @@ -40,6 +40,10 @@ #include "rte_member.h" #include "rte_member_ht.h" +#if defined(RTE_ARCH_X86) +#include "rte_member_x86.h" +#endif + static inline int insert_overwrite_search(uint32_t bucket, SIG_TYPE tmp_sig, @@ -135,6 +139,13 @@ rte_member_create_ht(struct rte_member_setsum *ss, for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) buckets[i].sets[j] = RTE_MEMBER_NO_MATCH; } +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) && + RTE_MEMBER_BUCKET_ENTRIES == 16) + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2; + else +#endif + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR; RTE_LOG(DEBUG, MEMBER, "Hash table based filter created, " @@ -174,11 +185,23 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss, *set_id = RTE_MEMBER_NO_MATCH; get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); - if (search_bucket_single(prim_bucket, tmp_sig, buckets, - set_id) || - search_bucket_single(sec_bucket, tmp_sig, - buckets, set_id)) - return 1; + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single_avx(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + break; +#endif + default: + if (search_bucket_single(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + } return 0; } @@ -203,13 +226,27 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss, } for (i = 0; i < num_keys; i++) { - if (search_bucket_single(prim_buckets[i], tmp_sig[i], - buckets, &set_id[i]) || - search_bucket_single(sec_buckets[i], - tmp_sig[i], buckets, &set_id[i])) - ret++; - else - set_id[i] = RTE_MEMBER_NO_MATCH; + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_buckets[i], + tmp_sig[i], buckets, &set_id[i]) || + search_bucket_single_avx(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + ret++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + break; +#endif + default: + if (search_bucket_single(prim_buckets[i], tmp_sig[i], + buckets, &set_id[i]) || + search_bucket_single(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + ret++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + } } return ret; } @@ -227,12 +264,24 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss, get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); - search_bucket_multi(prim_bucket, tmp_sig, buckets, &ret, - match_per_key, set_id); - if (ret < match_per_key) - search_bucket_multi(sec_bucket, tmp_sig, - buckets, &ret, match_per_key, set_id); - return ret; + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_bucket, tmp_sig, buckets, + &ret, match_per_key, set_id); + if (ret < match_per_key) + search_bucket_multi_avx(sec_bucket, tmp_sig, + buckets, &ret, match_per_key, set_id); + return ret; +#endif + default: + search_bucket_multi(prim_bucket, tmp_sig, buckets, &ret, + match_per_key, set_id); + if (ret < match_per_key) + search_bucket_multi(sec_bucket, tmp_sig, + buckets, &ret, match_per_key, set_id); + return ret; + } } @@ -259,16 +308,34 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss, for (i = 0; i < num_keys; i++) { match_cnt_t = 0; - search_bucket_multi(prim_buckets[i], tmp_sig[i], - buckets, &match_cnt_t, match_per_key, - &set_ids[i*match_per_key]); - if (match_cnt_t < match_per_key) - search_bucket_multi(sec_buckets[i], tmp_sig[i], + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_buckets[i], tmp_sig[i], buckets, &match_cnt_t, match_per_key, &set_ids[i*match_per_key]); - match_count[i] = match_cnt_t; - if (match_cnt_t != 0) - ret++; + if (match_cnt_t < match_per_key) + search_bucket_multi_avx(sec_buckets[i], + tmp_sig[i], buckets, &match_cnt_t, + match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_t; + if (match_cnt_t != 0) + ret++; + break; +#endif + default: + search_bucket_multi(prim_buckets[i], tmp_sig[i], + buckets, &match_cnt_t, match_per_key, + &set_ids[i*match_per_key]); + if (match_cnt_t < match_per_key) + search_bucket_multi(sec_buckets[i], tmp_sig[i], + buckets, &match_cnt_t, match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_t; + if (match_cnt_t != 0) + ret++; + } } return ret; } @@ -300,12 +367,24 @@ try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, static inline int try_overwrite(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, - SIG_TYPE sig, MEMBER_SET_TYPE set_id) + SIG_TYPE sig, MEMBER_SET_TYPE set_id, + enum rte_member_sig_compare_function cmp_fn) { - if (insert_overwrite_search(prim, sig, buckets, set_id) || - insert_overwrite_search(sec, sig, buckets, - set_id)) - return 0; + switch (cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (insert_overwrite_search_avx(prim, sig, buckets, set_id) || + insert_overwrite_search_avx(sec, sig, buckets, + set_id)) + return 0; + break; +#endif + default: + if (insert_overwrite_search(prim, sig, buckets, set_id) || + insert_overwrite_search(sec, sig, buckets, + set_id)) + return 0; + } return -1; } @@ -411,7 +490,7 @@ rte_member_add_ht(const struct rte_member_setsum *ss, /* if it is cache based filter, we try overwriting existing entry */ if (ss->cache) { ret = try_overwrite(buckets, prim_bucket, sec_bucket, tmp_sig, - set_id); + set_id, ss->sig_cmp_fn); if (ret != -1) return ret; } diff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h new file mode 100644 index 0000000..c55f128 --- /dev/null +++ b/lib/librte_member/rte_member_x86.h @@ -0,0 +1,111 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMBER_X86_H_ +#define _RTE_MEMBER_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + + +#if defined(RTE_MACHINE_CPUFLAG_AVX2) + + +static inline int +insert_overwrite_search_avx(uint32_t bucket, SIG_TYPE tmp_sig, + struct member_ht_bucket *buckets, + MEMBER_SET_TYPE set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket].sigs), + _mm256_set1_epi16(tmp_sig))); + if (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) / 2; + buckets[bucket].sets[hit_idx] = set_id; + return 1; + } + return 0; +} + + +static inline int +search_bucket_single_avx(uint32_t bucket, SIG_TYPE tmp_sig, + struct member_ht_bucket *buckets, + MEMBER_SET_TYPE *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) / 2; + if (buckets[bucket].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + *set_id = buckets[bucket].sets[hit_idx]; + return 1; + } + hitmask &= ~(3U << (hit_idx) * 2); + } + return 0; +} + +static inline void +search_bucket_multi_avx(uint32_t bucket, SIG_TYPE tmp_sig, + struct member_ht_bucket *buckets, + uint32_t *counter, + uint32_t match_per_key, + MEMBER_SET_TYPE *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) / 2; + if (buckets[bucket].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + set_id[*counter] = buckets[bucket].sets[hit_idx]; + (*counter)++; + if (*counter >= match_per_key) + return; + } + hitmask &= ~(3U << (hit_idx) * 2); + } +} +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_X86_H_ */ -- 2.7.4