From: Konstantin Ananyev <konstantin.ananyev@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2 04/17] librte_acl: remove build phase heuristsic with negative perfomance effect.
Date: Mon, 12 Jan 2015 19:16:08 +0000 [thread overview]
Message-ID: <1421090181-17150-5-git-send-email-konstantin.ananyev@intel.com> (raw)
In-Reply-To: <1421090181-17150-1-git-send-email-konstantin.ananyev@intel.com>
Current rule-wildness based heuristsics can cause unnecessary splits of
the ruleset.
That might have negative perfomance effect:
more tries to traverse, bigger RT tables.
After removing it, on some test-cases with big rulesets (~10K)
observed ~50% speedup.
No difference for smaller rulesets.
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
lib/librte_acl/acl_bld.c | 277 +++++++++++++++++------------------------------
1 file changed, 97 insertions(+), 180 deletions(-)
diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c
index c5a674a..8bf4a54 100644
--- a/lib/librte_acl/acl_bld.c
+++ b/lib/librte_acl/acl_bld.c
@@ -1539,11 +1539,9 @@ acl_calc_wildness(struct rte_acl_build_rule *head,
return 0;
}
-static int
-acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config,
- uint32_t *wild_limit)
+static void
+acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config)
{
- int min;
struct rte_acl_build_rule *rule;
uint32_t n, m, fields_deactivated = 0;
uint32_t start = 0, deactivate = 0;
@@ -1604,129 +1602,58 @@ acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config,
for (k = 0; k < config->num_fields; k++) {
if (tally[k][TALLY_DEACTIVATED] == 0) {
- memcpy(&tally[l][0], &tally[k][0],
+ memmove(&tally[l][0], &tally[k][0],
TALLY_NUM * sizeof(tally[0][0]));
- memcpy(&config->defs[l++],
+ memmove(&config->defs[l++],
&config->defs[k],
sizeof(struct rte_acl_field_def));
}
}
config->num_fields = l;
}
-
- min = RTE_ACL_SINGLE_TRIE_SIZE;
- if (config->num_fields == 2)
- min *= 4;
- else if (config->num_fields == 3)
- min *= 3;
- else if (config->num_fields == 4)
- min *= 2;
-
- if (tally[0][TALLY_0] < min)
- return 0;
- for (n = 0; n < config->num_fields; n++)
- wild_limit[n] = 0;
-
- /*
- * If trailing fields are 100% wild, group those together.
- * This allows the search length of the trie to be shortened.
- */
- for (n = 1; n < config->num_fields; n++) {
-
- double rule_percentage = (double)tally[n][TALLY_DEPTH] /
- tally[n][0];
-
- if (rule_percentage > RULE_PERCENTAGE) {
- /* if it crosses an input boundary then round up */
- while (config->defs[n - 1].input_index ==
- config->defs[n].input_index)
- n++;
-
- /* set the limit for selecting rules */
- while (n < config->num_fields)
- wild_limit[n++] = 100;
-
- if (wild_limit[n - 1] == 100)
- return 1;
- }
- }
-
- /* look for the most wild that's 40% or more of the rules */
- for (n = 1; n < config->num_fields; n++) {
- for (m = TALLY_100; m > 0; m--) {
-
- double rule_percentage = (double)tally[n][m] /
- tally[n][0];
-
- if (tally[n][TALLY_DEACTIVATED] == 0 &&
- tally[n][TALLY_0] >
- RTE_ACL_SINGLE_TRIE_SIZE &&
- rule_percentage > NODE_PERCENTAGE &&
- rule_percentage < 0.80) {
- wild_limit[n] = wild_limits[m];
- return 1;
- }
- }
- }
- return 0;
}
static int
-order(struct rte_acl_build_rule **insert, struct rte_acl_build_rule *rule)
+rule_cmp_wildness(struct rte_acl_build_rule *r1, struct rte_acl_build_rule *r2)
{
uint32_t n;
- struct rte_acl_build_rule *left = *insert;
-
- if (left == NULL)
- return 0;
- for (n = 1; n < left->config->num_fields; n++) {
- int field_index = left->config->defs[n].field_index;
+ for (n = 1; n < r1->config->num_fields; n++) {
+ int field_index = r1->config->defs[n].field_index;
- if (left->wildness[field_index] != rule->wildness[field_index])
- return (left->wildness[field_index] >=
- rule->wildness[field_index]);
+ if (r1->wildness[field_index] != r2->wildness[field_index])
+ return (r1->wildness[field_index] -
+ r2->wildness[field_index]);
}
return 0;
}
static struct rte_acl_build_rule *
-ordered_insert_rule(struct rte_acl_build_rule *head,
- struct rte_acl_build_rule *rule)
-{
- struct rte_acl_build_rule **insert;
-
- if (rule == NULL)
- return head;
-
- rule->next = head;
- if (head == NULL)
- return rule;
-
- insert = &head;
- while (order(insert, rule))
- insert = &(*insert)->next;
-
- rule->next = *insert;
- *insert = rule;
- return head;
-}
-
-static struct rte_acl_build_rule *
sort_rules(struct rte_acl_build_rule *head)
{
- struct rte_acl_build_rule *rule, *reordered_head = NULL;
- struct rte_acl_build_rule *last_rule = NULL;
-
- for (rule = head; rule != NULL; rule = rule->next) {
- reordered_head = ordered_insert_rule(reordered_head, last_rule);
- last_rule = rule;
+ struct rte_acl_build_rule *new_head;
+ struct rte_acl_build_rule *l, *r, **p;
+
+ new_head = NULL;
+ while (head != NULL) {
+ r = head;
+ head = r->next;
+ r->next = NULL;
+ if (new_head == NULL) {
+ new_head = r;
+ } else {
+ for (p = &new_head;
+ (l = *p) != NULL &&
+ rule_cmp_wildness(l, r) >= 0;
+ p = &l->next)
+ ;
+
+ r->next = *p;
+ *p = r;
+ }
}
- if (last_rule != reordered_head)
- reordered_head = ordered_insert_rule(reordered_head, last_rule);
-
- return reordered_head;
+ return new_head;
}
static uint32_t
@@ -1748,21 +1675,44 @@ acl_build_index(const struct rte_acl_config *config, uint32_t *data_index)
return m;
}
+static struct rte_acl_build_rule *
+build_one_trie(struct acl_build_context *context,
+ struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES],
+ uint32_t n)
+{
+ struct rte_acl_build_rule *last;
+ struct rte_acl_config *config;
+
+ config = rule_sets[n]->config;
+
+ acl_rule_stats(rule_sets[n], config);
+ rule_sets[n] = sort_rules(rule_sets[n]);
+
+ context->tries[n].type = RTE_ACL_FULL_TRIE;
+ context->tries[n].count = 0;
+
+ context->tries[n].num_data_indexes = acl_build_index(config,
+ context->data_indexes[n]);
+ context->tries[n].data_index = context->data_indexes[n];
+
+ context->bld_tries[n].trie = build_trie(context, rule_sets[n],
+ &last, &context->tries[n].count);
+
+ return last;
+}
+
static int
acl_build_tries(struct acl_build_context *context,
struct rte_acl_build_rule *head)
{
int32_t rc;
- uint32_t n, m, num_tries;
+ uint32_t n, num_tries;
struct rte_acl_config *config;
- struct rte_acl_build_rule *last, *rule;
- uint32_t wild_limit[RTE_ACL_MAX_LEVELS];
+ struct rte_acl_build_rule *last;
struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES];
config = head->config;
- rule = head;
rule_sets[0] = head;
- num_tries = 1;
/* initialize tries */
for (n = 0; n < RTE_DIM(context->tries); n++) {
@@ -1779,91 +1729,55 @@ acl_build_tries(struct acl_build_context *context,
if (rc != 0)
return rc;
- n = acl_rule_stats(head, config, &wild_limit[0]);
-
- /* put all rules that fit the wildness criteria into a seperate trie */
- while (n > 0 && num_tries < RTE_ACL_MAX_TRIES) {
+ for (n = 0;; n = num_tries) {
- struct rte_acl_config *new_config;
- struct rte_acl_build_rule **prev = &rule_sets[num_tries - 1];
- struct rte_acl_build_rule *next = head->next;
+ num_tries = n + 1;
- new_config = acl_build_alloc(context, 1, sizeof(*new_config));
- if (new_config == NULL) {
- RTE_LOG(ERR, ACL,
- "Failed to get space for new config\n");
+ last = build_one_trie(context, rule_sets, n);
+ if (context->bld_tries[n].trie == NULL) {
+ RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n);
return -ENOMEM;
}
- memcpy(new_config, config, sizeof(*new_config));
- config = new_config;
- rule_sets[num_tries] = NULL;
-
- for (rule = head; rule != NULL; rule = next) {
+ /* Build of the last trie completed. */
+ if (last == NULL)
+ break;
- int move = 1;
+ if (num_tries == RTE_DIM(context->tries)) {
+ RTE_LOG(ERR, ACL,
+ "Exceeded max number of tries: %u\n",
+ num_tries);
+ return -ENOMEM;
+ }
- next = rule->next;
- for (m = 0; m < config->num_fields; m++) {
- int x = config->defs[m].field_index;
- if (rule->wildness[x] < wild_limit[m]) {
- move = 0;
- break;
- }
- }
+ /* Trie is getting too big, split remaining rule set. */
+ rule_sets[num_tries] = last->next;
+ last->next = NULL;
+ acl_free_node(context, context->bld_tries[n].trie);
- if (move) {
- rule->config = new_config;
- rule->next = rule_sets[num_tries];
- rule_sets[num_tries] = rule;
- *prev = next;
- } else
- prev = &rule->next;
+ /* Create a new copy of config for remaining rules. */
+ config = acl_build_alloc(context, 1, sizeof(*config));
+ if (config == NULL) {
+ RTE_LOG(ERR, ACL,
+ "New config allocation for %u-th "
+ "trie failed\n", num_tries);
+ return -ENOMEM;
}
- head = rule_sets[num_tries];
- n = acl_rule_stats(rule_sets[num_tries], config,
- &wild_limit[0]);
- num_tries++;
- }
-
- if (n > 0)
- RTE_LOG(DEBUG, ACL,
- "Number of tries(%d) exceeded.\n", RTE_ACL_MAX_TRIES);
+ memcpy(config, rule_sets[n]->config, sizeof(*config));
- for (n = 0; n < num_tries; n++) {
+ /* Make remaining rules use new config. */
+ for (head = rule_sets[num_tries]; head != NULL;
+ head = head->next)
+ head->config = config;
- rule_sets[n] = sort_rules(rule_sets[n]);
- context->tries[n].type = RTE_ACL_FULL_TRIE;
- context->tries[n].count = 0;
- context->tries[n].num_data_indexes =
- acl_build_index(rule_sets[n]->config,
- context->data_indexes[n]);
- context->tries[n].data_index = context->data_indexes[n];
-
- context->bld_tries[n].trie =
- build_trie(context, rule_sets[n],
- &last, &context->tries[n].count);
- if (context->bld_tries[n].trie == NULL) {
+ /* Rebuild the trie for the reduced rule-set. */
+ last = build_one_trie(context, rule_sets, n);
+ if (context->bld_tries[n].trie == NULL || last != NULL) {
RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n);
return -ENOMEM;
}
- if (last != NULL) {
- rule_sets[num_tries++] = last->next;
- last->next = NULL;
- acl_free_node(context, context->bld_tries[n].trie);
- context->tries[n].count = 0;
-
- context->bld_tries[n].trie =
- build_trie(context, rule_sets[n],
- &last, &context->tries[n].count);
- if (context->bld_tries[n].trie == NULL) {
- RTE_LOG(ERR, ACL,
- "Build of %u-th trie failed\n", n);
- return -ENOMEM;
- }
- }
}
context->num_tries = num_tries;
@@ -1876,15 +1790,18 @@ acl_build_log(const struct acl_build_context *ctx)
uint32_t n;
RTE_LOG(DEBUG, ACL, "Build phase for ACL \"%s\":\n"
+ "nodes created: %u\n"
"memory consumed: %zu\n",
ctx->acx->name,
+ ctx->num_nodes,
ctx->pool.alloc);
for (n = 0; n < RTE_DIM(ctx->tries); n++) {
if (ctx->tries[n].count != 0)
RTE_LOG(DEBUG, ACL,
- "trie %u: number of rules: %u\n",
- n, ctx->tries[n].count);
+ "trie %u: number of rules: %u, indexes: %u\n",
+ n, ctx->tries[n].count,
+ ctx->tries[n].num_data_indexes);
}
}
--
1.8.5.3
next prev parent reply other threads:[~2015-01-12 19:16 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-01-12 19:16 [dpdk-dev] [PATCH v2 00/17] ACL: New AVX2 classify method and several other enhancements Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 01/17] fix fix compilation issues with RTE_LIBRTE_ACL_STANDALONE=y Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 02/17] app/test: few small fixes fot test_acl.c Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 03/17] librte_acl: make data_indexes long enough to survive idle transitions Konstantin Ananyev
2015-01-12 19:16 ` Konstantin Ananyev [this message]
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 05/17] librte_acl: fix a bug at build phase that can cause matches beeing overwirtten Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 06/17] librte_acl: introduce DFA nodes compression (group64) for identical entries Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 07/17] librte_acl: build/gen phase - simplify the way match nodes are allocated Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 08/17] librte_acl: make scalar RT code to be more similar to vector one Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 09/17] librte_acl: a bit of RT code deduplication Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 10/17] EAL: introduce rte_ymm and relatives in rte_common_vect.h Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 11/17] librte_acl: add AVX2 as new rte_acl_classify() method Konstantin Ananyev
2015-01-19 17:22 ` Thomas Monjalon
2015-01-20 10:56 ` Ananyev, Konstantin
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 12/17] test-acl: add ability to manually select RT method Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 13/17] librte_acl: Remove search_sse_2 and relatives Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 14/17] libter_acl: move lo/hi dwords shuffle out from calc_addr Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 15/17] libte_acl: make calc_addr a define to deduplicate the code Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 16/17] libte_acl: introduce max_size into rte_acl_config Konstantin Ananyev
2015-01-12 19:16 ` [dpdk-dev] [PATCH v2 17/17] libte_acl: remove unused macros Konstantin Ananyev
2015-01-19 17:17 ` Thomas Monjalon
2015-01-20 10:09 ` Ananyev, Konstantin
2015-01-20 10:48 ` Jim Thompson
[not found] ` <2601191342CEEE43887BDE71AB977258213DE0BB@irsmsx105.ger.corp.intel.com>
2015-01-20 11:11 ` Ananyev, Konstantin
2015-01-20 12:26 ` Thomas Monjalon
2015-01-14 18:39 ` [dpdk-dev] [PATCH v2 00/17] ACL: New AVX2 classify method and several other enhancements Neil Horman
2015-01-19 17:16 ` Thomas Monjalon
2015-01-19 18:39 ` Neil Horman
2015-01-20 10:11 ` Ananyev, Konstantin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1421090181-17150-5-git-send-email-konstantin.ananyev@intel.com \
--to=konstantin.ananyev@intel.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).