DPDK patches and discussions
 help / color / mirror / Atom feed
From: Konstantin Ananyev <konstantin.ananyev@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2 04/17] librte_acl: remove build phase heuristsic with negative perfomance effect.
Date: Mon, 12 Jan 2015 19:16:08 +0000	[thread overview]
Message-ID: <1421090181-17150-5-git-send-email-konstantin.ananyev@intel.com> (raw)
In-Reply-To: <1421090181-17150-1-git-send-email-konstantin.ananyev@intel.com>

Current rule-wildness based heuristsics can cause unnecessary splits of
the ruleset.
That might have negative perfomance effect:
more tries to traverse, bigger RT tables.
After removing it, on some test-cases with big rulesets (~10K)
observed ~50% speedup.
No difference for smaller rulesets.

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 lib/librte_acl/acl_bld.c | 277 +++++++++++++++++------------------------------
 1 file changed, 97 insertions(+), 180 deletions(-)

diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c
index c5a674a..8bf4a54 100644
--- a/lib/librte_acl/acl_bld.c
+++ b/lib/librte_acl/acl_bld.c
@@ -1539,11 +1539,9 @@ acl_calc_wildness(struct rte_acl_build_rule *head,
 	return 0;
 }
 
-static int
-acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config,
-	uint32_t *wild_limit)
+static void
+acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config)
 {
-	int min;
 	struct rte_acl_build_rule *rule;
 	uint32_t n, m, fields_deactivated = 0;
 	uint32_t start = 0, deactivate = 0;
@@ -1604,129 +1602,58 @@ acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config,
 
 		for (k = 0; k < config->num_fields; k++) {
 			if (tally[k][TALLY_DEACTIVATED] == 0) {
-				memcpy(&tally[l][0], &tally[k][0],
+				memmove(&tally[l][0], &tally[k][0],
 					TALLY_NUM * sizeof(tally[0][0]));
-				memcpy(&config->defs[l++],
+				memmove(&config->defs[l++],
 					&config->defs[k],
 					sizeof(struct rte_acl_field_def));
 			}
 		}
 		config->num_fields = l;
 	}
-
-	min = RTE_ACL_SINGLE_TRIE_SIZE;
-	if (config->num_fields == 2)
-		min *= 4;
-	else if (config->num_fields == 3)
-		min *= 3;
-	else if (config->num_fields == 4)
-		min *= 2;
-
-	if (tally[0][TALLY_0] < min)
-		return 0;
-	for (n = 0; n < config->num_fields; n++)
-		wild_limit[n] = 0;
-
-	/*
-	 * If trailing fields are 100% wild, group those together.
-	 * This allows the search length of the trie to be shortened.
-	 */
-	for (n = 1; n < config->num_fields; n++) {
-
-		double rule_percentage = (double)tally[n][TALLY_DEPTH] /
-			tally[n][0];
-
-		if (rule_percentage > RULE_PERCENTAGE) {
-			/* if it crosses an input boundary then round up */
-			while (config->defs[n - 1].input_index ==
-					config->defs[n].input_index)
-				n++;
-
-			/* set the limit for selecting rules */
-			while (n < config->num_fields)
-				wild_limit[n++] = 100;
-
-			if (wild_limit[n - 1] == 100)
-				return 1;
-		}
-	}
-
-	/* look for the most wild that's 40% or more of the rules */
-	for (n = 1; n < config->num_fields; n++) {
-		for (m = TALLY_100; m > 0; m--) {
-
-			double rule_percentage = (double)tally[n][m] /
-				tally[n][0];
-
-			if (tally[n][TALLY_DEACTIVATED] == 0 &&
-					tally[n][TALLY_0] >
-					RTE_ACL_SINGLE_TRIE_SIZE &&
-					rule_percentage > NODE_PERCENTAGE &&
-					rule_percentage < 0.80) {
-				wild_limit[n] = wild_limits[m];
-				return 1;
-			}
-		}
-	}
-	return 0;
 }
 
 static int
-order(struct rte_acl_build_rule **insert, struct rte_acl_build_rule *rule)
+rule_cmp_wildness(struct rte_acl_build_rule *r1, struct rte_acl_build_rule *r2)
 {
 	uint32_t n;
-	struct rte_acl_build_rule *left = *insert;
-
-	if (left == NULL)
-		return 0;
 
-	for (n = 1; n < left->config->num_fields; n++) {
-		int field_index = left->config->defs[n].field_index;
+	for (n = 1; n < r1->config->num_fields; n++) {
+		int field_index = r1->config->defs[n].field_index;
 
-		if (left->wildness[field_index] != rule->wildness[field_index])
-			return (left->wildness[field_index] >=
-				rule->wildness[field_index]);
+		if (r1->wildness[field_index] != r2->wildness[field_index])
+			return (r1->wildness[field_index] -
+				r2->wildness[field_index]);
 	}
 	return 0;
 }
 
 static struct rte_acl_build_rule *
-ordered_insert_rule(struct rte_acl_build_rule *head,
-	struct rte_acl_build_rule *rule)
-{
-	struct rte_acl_build_rule **insert;
-
-	if (rule == NULL)
-		return head;
-
-	rule->next = head;
-	if (head == NULL)
-		return rule;
-
-	insert = &head;
-	while (order(insert, rule))
-		insert = &(*insert)->next;
-
-	rule->next = *insert;
-	*insert = rule;
-	return head;
-}
-
-static struct rte_acl_build_rule *
 sort_rules(struct rte_acl_build_rule *head)
 {
-	struct rte_acl_build_rule *rule, *reordered_head = NULL;
-	struct rte_acl_build_rule *last_rule = NULL;
-
-	for (rule = head; rule != NULL; rule = rule->next) {
-		reordered_head = ordered_insert_rule(reordered_head, last_rule);
-		last_rule = rule;
+	struct rte_acl_build_rule *new_head;
+	struct rte_acl_build_rule *l, *r, **p;
+
+	new_head = NULL;
+	while (head != NULL) {
+		r = head;
+		head = r->next;
+		r->next = NULL;
+		if (new_head == NULL) {
+			new_head = r;
+		} else {
+			for (p = &new_head;
+					(l = *p) != NULL &&
+					rule_cmp_wildness(l, r) >= 0;
+					p = &l->next)
+				;
+
+			r->next = *p;
+			*p = r;
+		}
 	}
 
-	if (last_rule != reordered_head)
-		reordered_head = ordered_insert_rule(reordered_head, last_rule);
-
-	return reordered_head;
+	return new_head;
 }
 
 static uint32_t
@@ -1748,21 +1675,44 @@ acl_build_index(const struct rte_acl_config *config, uint32_t *data_index)
 	return m;
 }
 
+static struct rte_acl_build_rule *
+build_one_trie(struct acl_build_context *context,
+	struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES],
+	uint32_t n)
+{
+	struct rte_acl_build_rule *last;
+	struct rte_acl_config *config;
+
+	config = rule_sets[n]->config;
+
+	acl_rule_stats(rule_sets[n], config);
+	rule_sets[n] = sort_rules(rule_sets[n]);
+
+	context->tries[n].type = RTE_ACL_FULL_TRIE;
+	context->tries[n].count = 0;
+
+	context->tries[n].num_data_indexes = acl_build_index(config,
+		context->data_indexes[n]);
+	context->tries[n].data_index = context->data_indexes[n];
+
+	context->bld_tries[n].trie = build_trie(context, rule_sets[n],
+		&last, &context->tries[n].count);
+
+	return last;
+}
+
 static int
 acl_build_tries(struct acl_build_context *context,
 	struct rte_acl_build_rule *head)
 {
 	int32_t rc;
-	uint32_t n, m, num_tries;
+	uint32_t n, num_tries;
 	struct rte_acl_config *config;
-	struct rte_acl_build_rule *last, *rule;
-	uint32_t wild_limit[RTE_ACL_MAX_LEVELS];
+	struct rte_acl_build_rule *last;
 	struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES];
 
 	config = head->config;
-	rule = head;
 	rule_sets[0] = head;
-	num_tries = 1;
 
 	/* initialize tries */
 	for (n = 0; n < RTE_DIM(context->tries); n++) {
@@ -1779,91 +1729,55 @@ acl_build_tries(struct acl_build_context *context,
 	if (rc != 0)
 		return rc;
 
-	n = acl_rule_stats(head, config, &wild_limit[0]);
-
-	/* put all rules that fit the wildness criteria into a seperate trie */
-	while (n > 0 && num_tries < RTE_ACL_MAX_TRIES) {
+	for (n = 0;; n = num_tries) {
 
-		struct rte_acl_config *new_config;
-		struct rte_acl_build_rule **prev = &rule_sets[num_tries - 1];
-		struct rte_acl_build_rule *next = head->next;
+		num_tries = n + 1;
 
-		new_config = acl_build_alloc(context, 1, sizeof(*new_config));
-		if (new_config == NULL) {
-			RTE_LOG(ERR, ACL,
-				"Failed to get space for new config\n");
+		last = build_one_trie(context, rule_sets, n);
+		if (context->bld_tries[n].trie == NULL) {
+			RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n);
 			return -ENOMEM;
 		}
 
-		memcpy(new_config, config, sizeof(*new_config));
-		config = new_config;
-		rule_sets[num_tries] = NULL;
-
-		for (rule = head; rule != NULL; rule = next) {
+		/* Build of the last trie completed. */
+		if (last == NULL)
+			break;
 
-			int move = 1;
+		if (num_tries == RTE_DIM(context->tries)) {
+			RTE_LOG(ERR, ACL,
+				"Exceeded max number of tries: %u\n",
+				num_tries);
+			return -ENOMEM;
+		}
 
-			next = rule->next;
-			for (m = 0; m < config->num_fields; m++) {
-				int x = config->defs[m].field_index;
-				if (rule->wildness[x] < wild_limit[m]) {
-					move = 0;
-					break;
-				}
-			}
+		/* Trie is getting too big, split remaining rule set. */
+		rule_sets[num_tries] = last->next;
+		last->next = NULL;
+		acl_free_node(context, context->bld_tries[n].trie);
 
-			if (move) {
-				rule->config = new_config;
-				rule->next = rule_sets[num_tries];
-				rule_sets[num_tries] = rule;
-				*prev = next;
-			} else
-				prev = &rule->next;
+		/* Create a new copy of config for remaining rules. */
+		config = acl_build_alloc(context, 1, sizeof(*config));
+		if (config == NULL) {
+			RTE_LOG(ERR, ACL,
+				"New config allocation for %u-th "
+				"trie failed\n", num_tries);
+			return -ENOMEM;
 		}
 
-		head = rule_sets[num_tries];
-		n = acl_rule_stats(rule_sets[num_tries], config,
-			&wild_limit[0]);
-		num_tries++;
-	}
-
-	if (n > 0)
-		RTE_LOG(DEBUG, ACL,
-			"Number of tries(%d) exceeded.\n", RTE_ACL_MAX_TRIES);
+		memcpy(config, rule_sets[n]->config, sizeof(*config));
 
-	for (n = 0; n < num_tries; n++) {
+		/* Make remaining rules use new config. */
+		for (head = rule_sets[num_tries]; head != NULL;
+				head = head->next)
+			head->config = config;
 
-		rule_sets[n] = sort_rules(rule_sets[n]);
-		context->tries[n].type = RTE_ACL_FULL_TRIE;
-		context->tries[n].count = 0;
-		context->tries[n].num_data_indexes =
-			acl_build_index(rule_sets[n]->config,
-			context->data_indexes[n]);
-		context->tries[n].data_index = context->data_indexes[n];
-
-		context->bld_tries[n].trie =
-				build_trie(context, rule_sets[n],
-				&last, &context->tries[n].count);
-		if (context->bld_tries[n].trie == NULL) {
+		/* Rebuild the trie for the reduced rule-set. */
+		last = build_one_trie(context, rule_sets, n);
+		if (context->bld_tries[n].trie == NULL || last != NULL) {
 			RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n);
 			return -ENOMEM;
 		}
 
-		if (last != NULL) {
-			rule_sets[num_tries++] = last->next;
-			last->next = NULL;
-			acl_free_node(context, context->bld_tries[n].trie);
-			context->tries[n].count = 0;
-
-			context->bld_tries[n].trie =
-					build_trie(context, rule_sets[n],
-					&last, &context->tries[n].count);
-			if (context->bld_tries[n].trie == NULL) {
-				RTE_LOG(ERR, ACL,
-					"Build of %u-th trie failed\n", n);
-					return -ENOMEM;
-			}
-		}
 	}
 
 	context->num_tries = num_tries;
@@ -1876,15 +1790,18 @@ acl_build_log(const struct acl_build_context *ctx)
 	uint32_t n;
 
 	RTE_LOG(DEBUG, ACL, "Build phase for ACL \"%s\":\n"
+		"nodes created: %u\n"
 		"memory consumed: %zu\n",
 		ctx->acx->name,
+		ctx->num_nodes,
 		ctx->pool.alloc);
 
 	for (n = 0; n < RTE_DIM(ctx->tries); n++) {
 		if (ctx->tries[n].count != 0)
 			RTE_LOG(DEBUG, ACL,
-				"trie %u: number of rules: %u\n",
-				n, ctx->tries[n].count);
+				"trie %u: number of rules: %u, indexes: %u\n",
+				n, ctx->tries[n].count,
+				ctx->tries[n].num_data_indexes);
 	}
 }
 
-- 
1.8.5.3

  parent reply	other threads:[~2015-01-12 19:16 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-12 19:16 [dpdk-dev] [PATCH v2 00/17] ACL: New AVX2 classify method and several other enhancements Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 01/17] fix fix compilation issues with RTE_LIBRTE_ACL_STANDALONE=y Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 02/17] app/test: few small fixes fot test_acl.c Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 03/17] librte_acl: make data_indexes long enough to survive idle transitions Konstantin Ananyev
2015-01-12 19:16 ` Konstantin Ananyev [this message]
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 05/17] librte_acl: fix a bug at build phase that can cause matches beeing overwirtten Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 06/17] librte_acl: introduce DFA nodes compression (group64) for identical entries Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 07/17] librte_acl: build/gen phase - simplify the way match nodes are allocated Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 08/17] librte_acl: make scalar RT code to be more similar to vector one Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 09/17] librte_acl: a bit of RT code deduplication Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 10/17] EAL: introduce rte_ymm and relatives in rte_common_vect.h Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 11/17] librte_acl: add AVX2 as new rte_acl_classify() method Konstantin Ananyev
2015-01-19 17:22   ` Thomas Monjalon
2015-01-20 10:56     ` Ananyev, Konstantin
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 12/17] test-acl: add ability to manually select RT method Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 13/17] librte_acl: Remove search_sse_2 and relatives Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 14/17] libter_acl: move lo/hi dwords shuffle out from calc_addr Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 15/17] libte_acl: make calc_addr a define to deduplicate the code Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 16/17] libte_acl: introduce max_size into rte_acl_config Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 17/17] libte_acl: remove unused macros Konstantin Ananyev
2015-01-19 17:17   ` Thomas Monjalon
2015-01-20 10:09     ` Ananyev, Konstantin
2015-01-20 10:48       ` Jim Thompson
     [not found]         ` <2601191342CEEE43887BDE71AB977258213DE0BB@irsmsx105.ger.corp.intel.com>
2015-01-20 11:11           ` Ananyev, Konstantin
2015-01-20 12:26       ` Thomas Monjalon
2015-01-14 18:39 ` [dpdk-dev] [PATCH v2 00/17] ACL: New AVX2 classify method and several other enhancements Neil Horman
2015-01-19 17:16   ` Thomas Monjalon
2015-01-19 18:39     ` Neil Horman
2015-01-20 10:11     ` Ananyev, Konstantin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1421090181-17150-5-git-send-email-konstantin.ananyev@intel.com \
    --to=konstantin.ananyev@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).