DPDK patches and discussions
 help / color / Atom feed
From: Konstantin Ananyev <konstantin.ananyev@intel.com>
To: dev@dpdk.org
Cc: jerinj@marvell.com, ruifeng.wang@arm.com,
	vladimir.medvedkin@intel.com,
	Konstantin Ananyev <konstantin.ananyev@intel.com>
Subject: [dpdk-dev] [PATCH v4 11/14] acl: for AVX512 classify use 4B load whenever possible
Date: Tue,  6 Oct 2020 16:03:13 +0100
Message-ID: <20201006150316.5776-12-konstantin.ananyev@intel.com> (raw)
In-Reply-To: <20201006150316.5776-1-konstantin.ananyev@intel.com>

With current ACL implementation first field in the rule definition
has always to be one byte long. Though for optimising classify
implementation it might be useful to do 4B reads
(as we do for rest of the fields).
So at build phase, check user provided field definitions to determine
is it safe to do 4B loads for first ACL field.
Then at run-time this information can be used to choose classify
behavior.

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 lib/librte_acl/acl.h               |  1 +
 lib/librte_acl/acl_bld.c           | 34 ++++++++++++++++++++++++++++++
 lib/librte_acl/acl_run_avx512.c    |  2 ++
 lib/librte_acl/acl_run_avx512x16.h |  8 +++----
 lib/librte_acl/acl_run_avx512x8.h  |  8 +++----
 lib/librte_acl/rte_acl.c           |  1 +
 6 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
index 7ac0d12f08..4089ab2a04 100644
--- a/lib/librte_acl/acl.h
+++ b/lib/librte_acl/acl.h
@@ -169,6 +169,7 @@ struct rte_acl_ctx {
 	int32_t             socket_id;
 	/** Socket ID to allocate memory from. */
 	enum rte_acl_classify_alg alg;
+	uint32_t           first_load_sz;
 	void               *rules;
 	uint32_t            max_rules;
 	uint32_t            rule_sz;
diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c
index d1f920b09c..da10864cd8 100644
--- a/lib/librte_acl/acl_bld.c
+++ b/lib/librte_acl/acl_bld.c
@@ -1581,6 +1581,37 @@ acl_check_bld_param(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
 	return 0;
 }
 
+/*
+ * With current ACL implementation first field in the rule definition
+ * has always to be one byte long. Though for optimising *classify*
+ * implementation it might be useful to be able to use 4B reads
+ * (as we do for rest of the fields).
+ * This function checks input config to determine is it safe to do 4B
+ * loads for first ACL field. For that we need to make sure that
+ * first field in our rule definition doesn't have the biggest offset,
+ * i.e. we still do have other fields located after the first one.
+ * Contrary if first field has the largest offset, then it means
+ * first field can occupy the very last byte in the input data buffer,
+ * and we have to do single byte load for it.
+ */
+static uint32_t
+get_first_load_size(const struct rte_acl_config *cfg)
+{
+	uint32_t i, max_ofs, ofs;
+
+	ofs = 0;
+	max_ofs = 0;
+
+	for (i = 0; i != cfg->num_fields; i++) {
+		if (cfg->defs[i].field_index == 0)
+			ofs = cfg->defs[i].offset;
+		else if (max_ofs < cfg->defs[i].offset)
+			max_ofs = cfg->defs[i].offset;
+	}
+
+	return (ofs < max_ofs) ? sizeof(uint32_t) : sizeof(uint8_t);
+}
+
 int
 rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
 {
@@ -1618,6 +1649,9 @@ rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
 				/* set data indexes. */
 				acl_set_data_indexes(ctx);
 
+				/* determine can we always do 4B load */
+				ctx->first_load_sz = get_first_load_size(cfg);
+
 				/* copy in build config. */
 				ctx->config = *cfg;
 			}
diff --git a/lib/librte_acl/acl_run_avx512.c b/lib/librte_acl/acl_run_avx512.c
index 74698fa2ea..3fd1e33c3f 100644
--- a/lib/librte_acl/acl_run_avx512.c
+++ b/lib/librte_acl/acl_run_avx512.c
@@ -11,6 +11,7 @@ struct acl_flow_avx512 {
 	uint32_t num_packets;       /* number of packets processed */
 	uint32_t total_packets;     /* max number of packets to process */
 	uint32_t root_index;        /* current root index */
+	uint32_t first_load_sz;     /* first load size for new packet */
 	const uint64_t *trans;      /* transition table */
 	const uint32_t *data_index; /* input data indexes */
 	const uint8_t **idata;      /* input data */
@@ -24,6 +25,7 @@ acl_set_flow_avx512(struct acl_flow_avx512 *flow, const struct rte_acl_ctx *ctx,
 {
 	flow->num_packets = 0;
 	flow->total_packets = total_packets;
+	flow->first_load_sz = ctx->first_load_sz;
 	flow->root_index = ctx->trie[trie].root_index;
 	flow->trans = ctx->trans_table;
 	flow->data_index = ctx->trie[trie].data_index;
diff --git a/lib/librte_acl/acl_run_avx512x16.h b/lib/librte_acl/acl_run_avx512x16.h
index 981f8d16da..a39df8f3c0 100644
--- a/lib/librte_acl/acl_run_avx512x16.h
+++ b/lib/librte_acl/acl_run_avx512x16.h
@@ -460,7 +460,7 @@ match_check_process_avx512x16x2(struct acl_flow_avx512 *flow, uint32_t fm[2],
 
 		if (n[0] != 0) {
 			inp[0] = get_next_bytes_avx512x16(flow, &pdata[0],
-				rm[0], &di[0], sizeof(uint8_t));
+				rm[0], &di[0], flow->first_load_sz);
 			first_trans16(flow, inp[0], rm[0], &tr_lo[0],
 				&tr_hi[0]);
 			rm[0] = _mm512_test_epi32_mask(tr_lo[0],
@@ -469,7 +469,7 @@ match_check_process_avx512x16x2(struct acl_flow_avx512 *flow, uint32_t fm[2],
 
 		if (n[1] != 0) {
 			inp[1] = get_next_bytes_avx512x16(flow, &pdata[2],
-				rm[1], &di[1], sizeof(uint8_t));
+				rm[1], &di[1], flow->first_load_sz);
 			first_trans16(flow, inp[1], rm[1], &tr_lo[1],
 				&tr_hi[1]);
 			rm[1] = _mm512_test_epi32_mask(tr_lo[1],
@@ -494,9 +494,9 @@ search_trie_avx512x16x2(struct acl_flow_avx512 *flow)
 	start_flow16(flow, MASK16_BIT, UINT16_MAX, &pdata[2], &idx[1], &di[1]);
 
 	in[0] = get_next_bytes_avx512x16(flow, &pdata[0], UINT16_MAX, &di[0],
-			sizeof(uint8_t));
+			flow->first_load_sz);
 	in[1] = get_next_bytes_avx512x16(flow, &pdata[2], UINT16_MAX, &di[1],
-			sizeof(uint8_t));
+			flow->first_load_sz);
 
 	first_trans16(flow, in[0], UINT16_MAX, &tr_lo[0], &tr_hi[0]);
 	first_trans16(flow, in[1], UINT16_MAX, &tr_lo[1], &tr_hi[1]);
diff --git a/lib/librte_acl/acl_run_avx512x8.h b/lib/librte_acl/acl_run_avx512x8.h
index cfba0299ed..fedd79b9ae 100644
--- a/lib/librte_acl/acl_run_avx512x8.h
+++ b/lib/librte_acl/acl_run_avx512x8.h
@@ -418,7 +418,7 @@ match_check_process_avx512x8x2(struct acl_flow_avx512 *flow, uint32_t fm[2],
 
 		if (n[0] != 0) {
 			inp[0] = get_next_bytes_avx512x8(flow, &pdata[0],
-				rm[0], &di[0], sizeof(uint8_t));
+				rm[0], &di[0], flow->first_load_sz);
 			first_trans8(flow, inp[0], rm[0], &tr_lo[0],
 				&tr_hi[0]);
 			rm[0] = _mm256_test_epi32_mask(tr_lo[0],
@@ -427,7 +427,7 @@ match_check_process_avx512x8x2(struct acl_flow_avx512 *flow, uint32_t fm[2],
 
 		if (n[1] != 0) {
 			inp[1] = get_next_bytes_avx512x8(flow, &pdata[2],
-				rm[1], &di[1], sizeof(uint8_t));
+				rm[1], &di[1], flow->first_load_sz);
 			first_trans8(flow, inp[1], rm[1], &tr_lo[1],
 				&tr_hi[1]);
 			rm[1] = _mm256_test_epi32_mask(tr_lo[1],
@@ -452,9 +452,9 @@ search_trie_avx512x8x2(struct acl_flow_avx512 *flow)
 	start_flow8(flow, MASK8_BIT, UINT8_MAX, &pdata[2], &idx[1], &di[1]);
 
 	in[0] = get_next_bytes_avx512x8(flow, &pdata[0], UINT8_MAX, &di[0],
-			sizeof(uint8_t));
+			flow->first_load_sz);
 	in[1] = get_next_bytes_avx512x8(flow, &pdata[2], UINT8_MAX, &di[1],
-			sizeof(uint8_t));
+			flow->first_load_sz);
 
 	first_trans8(flow, in[0], UINT8_MAX, &tr_lo[0], &tr_hi[0]);
 	first_trans8(flow, in[1], UINT8_MAX, &tr_lo[1], &tr_hi[1]);
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index 245af672ee..f1474038e5 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -500,6 +500,7 @@ rte_acl_dump(const struct rte_acl_ctx *ctx)
 	printf("acl context <%s>@%p\n", ctx->name, ctx);
 	printf("  socket_id=%"PRId32"\n", ctx->socket_id);
 	printf("  alg=%"PRId32"\n", ctx->alg);
+	printf("  first_load_sz=%"PRIu32"\n", ctx->first_load_sz);
 	printf("  max_rules=%"PRIu32"\n", ctx->max_rules);
 	printf("  rule_size=%"PRIu32"\n", ctx->rule_sz);
 	printf("  num_rules=%"PRIu32"\n", ctx->num_rules);
-- 
2.17.1


  parent reply index

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-07 16:28 [dpdk-dev] [PATCH 20.11 0/7] acl: introduce AVX512 classify method Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 1/7] acl: fix x86 build when compiler doesn't support AVX2 Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 2/7] app/acl: few small improvements Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 3/7] acl: remove of unused enum value Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 4/7] acl: add infrastructure to support AVX512 classify Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 5/7] app/acl: add AVX512 classify support Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 6/7] acl: introduce AVX512 classify implementation Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 7/7] acl: enhance " Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 00/12] acl: introduce AVX512 classify method Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 01/12] acl: fix x86 build when compiler doesn't support AVX2 Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 02/12] doc: fix mixing classify methods in ACL guide Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 03/12] acl: remove of unused enum value Konstantin Ananyev
2020-09-27  3:27     ` Ruifeng Wang
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 04/12] acl: remove library constructor Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 05/12] app/acl: few small improvements Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 06/12] test/acl: expand classify test coverage Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 07/12] acl: add infrastructure to support AVX512 classify Konstantin Ananyev
2020-09-16  9:11     ` Bruce Richardson
2020-09-16  9:36       ` Medvedkin, Vladimir
2020-09-16  9:49         ` Bruce Richardson
2020-09-16 10:06           ` Ananyev, Konstantin
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 08/12] acl: introduce AVX512 classify implementation Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 09/12] acl: enhance " Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 10/12] acl: for AVX512 classify use 4B load whenever possible Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 11/12] test/acl: add AVX512 classify support Konstantin Ananyev
2020-09-15 16:50   ` [dpdk-dev] [PATCH v2 12/12] app/acl: " Konstantin Ananyev
2020-10-05 18:45   ` [dpdk-dev] [PATCH v3 00/14] acl: introduce AVX512 classify methods Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 01/14] acl: fix x86 build when compiler doesn't support AVX2 Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 02/14] doc: fix missing classify methods in ACL guide Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 03/14] acl: remove of unused enum value Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 04/14] acl: remove library constructor Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 05/14] app/acl: few small improvements Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 06/14] test/acl: expand classify test coverage Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 07/14] acl: add infrastructure to support AVX512 classify Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 08/14] acl: introduce 256-bit width AVX512 classify implementation Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 09/14] acl: update default classify algorithm selection Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 10/14] acl: introduce 512-bit width AVX512 classify implementation Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 11/14] acl: for AVX512 classify use 4B load whenever possible Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 12/14] acl: deduplicate AVX512 code paths Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 13/14] test/acl: add AVX512 classify support Konstantin Ananyev
2020-10-05 18:45     ` [dpdk-dev] [PATCH v3 14/14] app/acl: " Konstantin Ananyev
2020-10-06 15:03     ` [dpdk-dev] [PATCH v4 00/14] acl: introduce AVX512 classify methods Konstantin Ananyev
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 01/14] acl: fix x86 build when compiler doesn't support AVX2 Konstantin Ananyev
2020-10-08 13:42         ` [dpdk-dev] [dpdk-stable] " David Marchand
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 02/14] doc: fix missing classify methods in ACL guide Konstantin Ananyev
2020-10-08 13:42         ` David Marchand
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 03/14] acl: remove of unused enum value Konstantin Ananyev
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 04/14] acl: remove library constructor Konstantin Ananyev
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 05/14] app/acl: few small improvements Konstantin Ananyev
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 06/14] test/acl: expand classify test coverage Konstantin Ananyev
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 07/14] acl: add infrastructure to support AVX512 classify Konstantin Ananyev
2020-10-13 19:17         ` David Marchand
2020-10-13 22:26           ` Ananyev, Konstantin
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 08/14] acl: introduce 256-bit width AVX512 classify implementation Konstantin Ananyev
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 09/14] acl: update default classify algorithm selection Konstantin Ananyev
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 10/14] acl: introduce 512-bit width AVX512 classify implementation Konstantin Ananyev
2020-10-06 15:03       ` Konstantin Ananyev [this message]
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 12/14] acl: deduplicate AVX512 code paths Konstantin Ananyev
2020-10-16 15:56         ` Ferruh Yigit
2020-10-16 16:20           ` Thomas Monjalon
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 13/14] test/acl: add AVX512 classify support Konstantin Ananyev
2020-10-14 10:26         ` David Marchand
2020-10-14 10:32           ` Ananyev, Konstantin
2020-10-14 10:35             ` David Marchand
2020-10-06 15:03       ` [dpdk-dev] [PATCH v4 14/14] app/acl: " Konstantin Ananyev
2020-10-14 12:40       ` [dpdk-dev] [PATCH v4 00/14] acl: introduce AVX512 classify methods David Marchand
2020-10-06 15:05     ` [dpdk-dev] [PATCH v3 " David Marchand
2020-10-06 16:07       ` Ananyev, Konstantin
2020-10-08 10:49         ` David Marchand
2020-10-14  9:23         ` Kinsella, Ray

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201006150316.5776-12-konstantin.ananyev@intel.com \
    --to=konstantin.ananyev@intel.com \
    --cc=dev@dpdk.org \
    --cc=jerinj@marvell.com \
    --cc=ruifeng.wang@arm.com \
    --cc=vladimir.medvedkin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

Archives are clonable:
	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev


Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox