From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id CDCFB5A9E for ; Mon, 12 Jan 2015 20:16:39 +0100 (CET) Received: from orsmga002.jf.intel.com ([10.7.209.21]) by fmsmga102.fm.intel.com with ESMTP; 12 Jan 2015 11:16:37 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.07,745,1413270000"; d="scan'208";a="668605495" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga002.jf.intel.com with ESMTP; 12 Jan 2015 11:16:36 -0800 Received: from sivswdev02.ir.intel.com (sivswdev02.ir.intel.com [10.237.217.46]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id t0CJGa5J008640; Mon, 12 Jan 2015 19:16:36 GMT Received: from sivswdev02.ir.intel.com (localhost [127.0.0.1]) by sivswdev02.ir.intel.com with ESMTP id t0CJGZ0t017293; Mon, 12 Jan 2015 19:16:35 GMT Received: (from kananye1@localhost) by sivswdev02.ir.intel.com with id t0CJGZJ7017289; Mon, 12 Jan 2015 19:16:35 GMT From: Konstantin Ananyev To: dev@dpdk.org Date: Mon, 12 Jan 2015 19:16:17 +0000 Message-Id: <1421090181-17150-14-git-send-email-konstantin.ananyev@intel.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1421090181-17150-1-git-send-email-konstantin.ananyev@intel.com> References: <1421090181-17150-1-git-send-email-konstantin.ananyev@intel.com> Subject: [dpdk-dev] [PATCH v2 13/17] librte_acl: Remove search_sse_2 and relatives. X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 12 Jan 2015 19:16:48 -0000 Previous improvements made scalar method the fastest one for tiny bunch of packets (< 4). That allows us to remove specific vector code-path for small number of packets (search_sse_2) and always use scalar method for such cases. Signed-off-by: Konstantin Ananyev --- lib/librte_acl/acl_run_avx2.c | 2 +- lib/librte_acl/acl_run_sse.c | 3 +- lib/librte_acl/acl_run_sse.h | 110 ------------------------------------------ 3 files changed, 3 insertions(+), 112 deletions(-) diff --git a/lib/librte_acl/acl_run_avx2.c b/lib/librte_acl/acl_run_avx2.c index 0a42f72..79ebbd6 100644 --- a/lib/librte_acl/acl_run_avx2.c +++ b/lib/librte_acl/acl_run_avx2.c @@ -49,6 +49,6 @@ rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data, else if (num >= MAX_SEARCHES_SSE4) return search_sse_4(ctx, data, results, num, categories); else - return search_sse_2(ctx, data, results, num, + return rte_acl_classify_scalar(ctx, data, results, num, categories); } diff --git a/lib/librte_acl/acl_run_sse.c b/lib/librte_acl/acl_run_sse.c index 77b32b3..a5a7d36 100644 --- a/lib/librte_acl/acl_run_sse.c +++ b/lib/librte_acl/acl_run_sse.c @@ -42,5 +42,6 @@ rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data, else if (num >= MAX_SEARCHES_SSE4) return search_sse_4(ctx, data, results, num, categories); else - return search_sse_2(ctx, data, results, num, categories); + return rte_acl_classify_scalar(ctx, data, results, num, + categories); } diff --git a/lib/librte_acl/acl_run_sse.h b/lib/librte_acl/acl_run_sse.h index e33e16b..1b7870e 100644 --- a/lib/librte_acl/acl_run_sse.h +++ b/lib/librte_acl/acl_run_sse.h @@ -45,10 +45,6 @@ static const rte_xmm_t xmm_shuffle_input = { .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}, }; -static const rte_xmm_t xmm_shuffle_input64 = { - .u32 = {0x00000000, 0x04040404, 0x80808080, 0x80808080}, -}; - static const rte_xmm_t xmm_ones_16 = { .u16 = {1, 1, 1, 1, 1, 1, 1, 1}, }; @@ -62,15 +58,6 @@ static const rte_xmm_t xmm_match_mask = { }, }; -static const rte_xmm_t xmm_match_mask64 = { - .u32 = { - RTE_ACL_NODE_MATCH, - 0, - RTE_ACL_NODE_MATCH, - 0, - }, -}; - static const rte_xmm_t xmm_index_mask = { .u32 = { RTE_ACL_NODE_INDEX, @@ -80,16 +67,6 @@ static const rte_xmm_t xmm_index_mask = { }, }; -static const rte_xmm_t xmm_index_mask64 = { - .u32 = { - RTE_ACL_NODE_INDEX, - RTE_ACL_NODE_INDEX, - 0, - 0, - }, -}; - - /* * Resolve priority for multiple results (sse version). * This consists comparing the priority of the current traversal with the @@ -161,22 +138,6 @@ acl_process_matches(xmm_t *indices, int slot, const struct rte_acl_ctx *ctx, } /* - * Check for a match in 2 transitions (contained in SSE register) - */ -static inline __attribute__((always_inline)) void -acl_match_check_x2(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, - struct acl_flow_data *flows, xmm_t *indices, xmm_t match_mask) -{ - xmm_t temp; - - temp = MM_AND(match_mask, *indices); - while (!MM_TESTZ(temp, temp)) { - acl_process_matches(indices, slot, ctx, parms, flows); - temp = MM_AND(match_mask, *indices); - } -} - -/* * Check for any match in 4 transitions (contained in 2 SSE registers) */ static inline __attribute__((always_inline)) void @@ -460,74 +421,3 @@ search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t **data, return 0; } - -static inline __attribute__((always_inline)) xmm_t -transition2(xmm_t next_input, const uint64_t *trans, xmm_t *indices1) -{ - uint64_t t; - xmm_t addr, indices2; - - indices2 = _mm_setzero_si128(); - - addr = calc_addr_sse(xmm_index_mask.x, next_input, xmm_shuffle_input.x, - xmm_ones_16.x, *indices1, indices2); - - /* Gather 64 bit transitions and pack 2 per register. */ - - t = trans[MM_CVT32(addr)]; - - /* get slot 1 */ - addr = MM_SHUFFLE32(addr, SHUFFLE32_SLOT1); - *indices1 = MM_SET64(trans[MM_CVT32(addr)], t); - - return MM_SRL32(next_input, CHAR_BIT); -} - -/* - * Execute trie traversal with 2 traversals in parallel. - */ -static inline int -search_sse_2(const struct rte_acl_ctx *ctx, const uint8_t **data, - uint32_t *results, uint32_t total_packets, uint32_t categories) -{ - int n; - struct acl_flow_data flows; - uint64_t index_array[MAX_SEARCHES_SSE2]; - struct completion cmplt[MAX_SEARCHES_SSE2]; - struct parms parms[MAX_SEARCHES_SSE2]; - xmm_t input, indices; - - acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, - total_packets, categories, ctx->trans_table); - - for (n = 0; n < MAX_SEARCHES_SSE2; n++) { - cmplt[n].count = 0; - index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); - } - - indices = MM_LOADU((xmm_t *) &index_array[0]); - - /* Check for any matches. */ - acl_match_check_x2(0, ctx, parms, &flows, &indices, - xmm_match_mask64.x); - - while (flows.started > 0) { - - /* Gather 4 bytes of input data for each stream. */ - input = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0)); - input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 1), 1); - - /* Process the 4 bytes of input on each stream. */ - - input = transition2(input, flows.trans, &indices); - input = transition2(input, flows.trans, &indices); - input = transition2(input, flows.trans, &indices); - input = transition2(input, flows.trans, &indices); - - /* Check for any matches. */ - acl_match_check_x2(0, ctx, parms, &flows, &indices, - xmm_match_mask64.x); - } - - return 0; -} -- 1.8.5.3