DPDK patches and discussions
 help / color / mirror / Atom feed
From: Konstantin Ananyev <konstantin.ananyev@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH 12/17] librte_acl: Remove search_sse_2 and relatives.
Date: Sun, 14 Dec 2014 18:10:54 +0000	[thread overview]
Message-ID: <1418580659-12595-13-git-send-email-konstantin.ananyev@intel.com> (raw)
In-Reply-To: <1418580659-12595-1-git-send-email-konstantin.ananyev@intel.com>

Previous improvements made scalar method the fastest one
for tiny bunch of packets (< 4).
That allows us to remove specific vector code-path for small number of packets
(search_sse_2)
and always use scalar method for such cases.

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 lib/librte_acl/acl_run_avx2.c |   2 +-
 lib/librte_acl/acl_run_sse.c  |   3 +-
 lib/librte_acl/acl_run_sse.h  | 110 ------------------------------------------
 3 files changed, 3 insertions(+), 112 deletions(-)

diff --git a/lib/librte_acl/acl_run_avx2.c b/lib/librte_acl/acl_run_avx2.c
index 8419d5d..a717c27 100644
--- a/lib/librte_acl/acl_run_avx2.c
+++ b/lib/librte_acl/acl_run_avx2.c
@@ -53,6 +53,6 @@ rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data,
 	else if (num >= MAX_SEARCHES_SSE4)
 		return search_sse_4(ctx, data, results, num, categories);
 	else
-		return search_sse_2(ctx, data, results, num,
+		return rte_acl_classify_scalar(ctx, data, results, num,
 			categories);
 }
diff --git a/lib/librte_acl/acl_run_sse.c b/lib/librte_acl/acl_run_sse.c
index 77b32b3..a5a7d36 100644
--- a/lib/librte_acl/acl_run_sse.c
+++ b/lib/librte_acl/acl_run_sse.c
@@ -42,5 +42,6 @@ rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data,
 	else if (num >= MAX_SEARCHES_SSE4)
 		return search_sse_4(ctx, data, results, num, categories);
 	else
-		return search_sse_2(ctx, data, results, num, categories);
+		return rte_acl_classify_scalar(ctx, data, results, num,
+			categories);
 }
diff --git a/lib/librte_acl/acl_run_sse.h b/lib/librte_acl/acl_run_sse.h
index e33e16b..1b7870e 100644
--- a/lib/librte_acl/acl_run_sse.h
+++ b/lib/librte_acl/acl_run_sse.h
@@ -45,10 +45,6 @@ static const rte_xmm_t xmm_shuffle_input = {
 	.u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c},
 };
 
-static const rte_xmm_t xmm_shuffle_input64 = {
-	.u32 = {0x00000000, 0x04040404, 0x80808080, 0x80808080},
-};
-
 static const rte_xmm_t xmm_ones_16 = {
 	.u16 = {1, 1, 1, 1, 1, 1, 1, 1},
 };
@@ -62,15 +58,6 @@ static const rte_xmm_t xmm_match_mask = {
 	},
 };
 
-static const rte_xmm_t xmm_match_mask64 = {
-	.u32 = {
-		RTE_ACL_NODE_MATCH,
-		0,
-		RTE_ACL_NODE_MATCH,
-		0,
-	},
-};
-
 static const rte_xmm_t xmm_index_mask = {
 	.u32 = {
 		RTE_ACL_NODE_INDEX,
@@ -80,16 +67,6 @@ static const rte_xmm_t xmm_index_mask = {
 	},
 };
 
-static const rte_xmm_t xmm_index_mask64 = {
-	.u32 = {
-		RTE_ACL_NODE_INDEX,
-		RTE_ACL_NODE_INDEX,
-		0,
-		0,
-	},
-};
-
-
 /*
  * Resolve priority for multiple results (sse version).
  * This consists comparing the priority of the current traversal with the
@@ -161,22 +138,6 @@ acl_process_matches(xmm_t *indices, int slot, const struct rte_acl_ctx *ctx,
 }
 
 /*
- * Check for a match in 2 transitions (contained in SSE register)
- */
-static inline __attribute__((always_inline)) void
-acl_match_check_x2(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
-	struct acl_flow_data *flows, xmm_t *indices, xmm_t match_mask)
-{
-	xmm_t temp;
-
-	temp = MM_AND(match_mask, *indices);
-	while (!MM_TESTZ(temp, temp)) {
-		acl_process_matches(indices, slot, ctx, parms, flows);
-		temp = MM_AND(match_mask, *indices);
-	}
-}
-
-/*
  * Check for any match in 4 transitions (contained in 2 SSE registers)
  */
 static inline __attribute__((always_inline)) void
@@ -460,74 +421,3 @@ search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
 
 	return 0;
 }
-
-static inline __attribute__((always_inline)) xmm_t
-transition2(xmm_t next_input, const uint64_t *trans, xmm_t *indices1)
-{
-	uint64_t t;
-	xmm_t addr, indices2;
-
-	indices2 = _mm_setzero_si128();
-
-	addr = calc_addr_sse(xmm_index_mask.x, next_input, xmm_shuffle_input.x,
-		xmm_ones_16.x, *indices1, indices2);
-
-	/* Gather 64 bit transitions and pack 2 per register. */
-
-	t = trans[MM_CVT32(addr)];
-
-	/* get slot 1 */
-	addr = MM_SHUFFLE32(addr, SHUFFLE32_SLOT1);
-	*indices1 = MM_SET64(trans[MM_CVT32(addr)], t);
-
-	return MM_SRL32(next_input, CHAR_BIT);
-}
-
-/*
- * Execute trie traversal with 2 traversals in parallel.
- */
-static inline int
-search_sse_2(const struct rte_acl_ctx *ctx, const uint8_t **data,
-	uint32_t *results, uint32_t total_packets, uint32_t categories)
-{
-	int n;
-	struct acl_flow_data flows;
-	uint64_t index_array[MAX_SEARCHES_SSE2];
-	struct completion cmplt[MAX_SEARCHES_SSE2];
-	struct parms parms[MAX_SEARCHES_SSE2];
-	xmm_t input, indices;
-
-	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
-		total_packets, categories, ctx->trans_table);
-
-	for (n = 0; n < MAX_SEARCHES_SSE2; n++) {
-		cmplt[n].count = 0;
-		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
-	}
-
-	indices = MM_LOADU((xmm_t *) &index_array[0]);
-
-	/* Check for any matches. */
-	acl_match_check_x2(0, ctx, parms, &flows, &indices,
-		xmm_match_mask64.x);
-
-	while (flows.started > 0) {
-
-		/* Gather 4 bytes of input data for each stream. */
-		input = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0));
-		input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 1), 1);
-
-		/* Process the 4 bytes of input on each stream. */
-
-		input = transition2(input, flows.trans, &indices);
-		input = transition2(input, flows.trans, &indices);
-		input = transition2(input, flows.trans, &indices);
-		input = transition2(input, flows.trans, &indices);
-
-		/* Check for any matches. */
-		acl_match_check_x2(0, ctx, parms, &flows, &indices,
-			xmm_match_mask64.x);
-	}
-
-	return 0;
-}
-- 
1.8.5.3

  parent reply	other threads:[~2014-12-14 18:11 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-12-14 18:10 [dpdk-dev] [PATCH 00/17] ACL: New AVX2 classify method and several other enhancements Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 01/17] app/test: few small fixes fot test_acl.c Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 02/17] librte_acl: make data_indexes long enough to survive idle transitions Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 03/17] librte_acl: remove build phase heuristsic with negative perfomance effect Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 04/17] librte_acl: fix a bug at build phase that can cause matches beeing overwirtten Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 05/17] librte_acl: introduce DFA nodes compression (group64) for identical entries Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 06/17] librte_acl: build/gen phase - simplify the way match nodes are allocated Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 07/17] librte_acl: make scalar RT code to be more similar to vector one Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 08/17] librte_acl: a bit of RT code deduplication Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 09/17] EAL: introduce rte_ymm and relatives in rte_common_vect.h Konstantin Ananyev
2014-12-15 15:56   ` Neil Horman
2014-12-14 18:10 ` [dpdk-dev] [PATCH 10/17] librte_acl: add AVX2 as new rte_acl_classify() method Konstantin Ananyev
2014-12-15 16:00   ` Neil Horman
2014-12-15 16:33     ` Ananyev, Konstantin
2014-12-15 20:20       ` Neil Horman
2014-12-16 16:16         ` Ananyev, Konstantin
2014-12-17 15:32           ` Neil Horman
2014-12-17 19:22             ` Ananyev, Konstantin
2014-12-17 20:27               ` Neil Horman
2014-12-18 15:01                 ` Ananyev, Konstantin
2015-01-06  9:57                   ` Ananyev, Konstantin
2015-01-06 12:40                     ` Neil Horman
2014-12-17  0:38         ` Ananyev, Konstantin
2014-12-14 18:10 ` [dpdk-dev] [PATCH 11/17] test-acl: add ability to manually select RT method Konstantin Ananyev
2014-12-14 18:10 ` Konstantin Ananyev [this message]
2014-12-14 18:10 ` [dpdk-dev] [PATCH 13/17] libter_acl: move lo/hi dwords shuffle out from calc_addr Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 14/17] libte_acl: make calc_addr a define to deduplicate the code Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 15/17] libte_acl: introduce max_size into rte_acl_config Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 16/17] libte_acl: remove unused macros Konstantin Ananyev
2014-12-14 18:10 ` [dpdk-dev] [PATCH 17/17] libte_acl: fix compilation issues with RTE_LIBRTE_ACL_STANDALONE=y Konstantin Ananyev
2014-12-16 13:51   ` Neil Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1418580659-12595-13-git-send-email-konstantin.ananyev@intel.com \
    --to=konstantin.ananyev@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).