DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v3 1/2] librte_lpm: Improve performance of the delete and add functions
@ 2018-07-11 17:53 Alex Kiselev
  0 siblings, 0 replies; 6+ messages in thread
From: Alex Kiselev @ 2018-07-11 17:53 UTC (permalink / raw)
  To: dev, Bruce Richardson

librte_lpm: Improve lpm6 performance

Rework the lpm6 rule subsystem and replace
current rules algorithm complexity O(n)
with hashtables which allow dealing with
large (50k) rule sets.

Signed-off-by: Alex Kiselev <alex@therouter.net>
---
 lib/Makefile               |   2 +-
 lib/librte_lpm/meson.build |   1 +
 lib/librte_lpm/rte_lpm6.c  | 406 ++++++++++++++++++++++++++++-----------------
 3 files changed, 255 insertions(+), 154 deletions(-)

diff --git a/lib/Makefile b/lib/Makefile
index d82462ba2..79aeaa04f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,7 +47,7 @@ DEPDIRS-librte_hash := librte_eal librte_ring
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
 DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
-DEPDIRS-librte_lpm := librte_eal
+DEPDIRS-librte_lpm := librte_eal librte_mempool librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 DEPDIRS-librte_acl := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member
diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
index 067849427..fcb5bed2c 100644
--- a/lib/librte_lpm/meson.build
+++ b/lib/librte_lpm/meson.build
@@ -7,3 +7,4 @@ headers = files('rte_lpm.h', 'rte_lpm6.h')
 # since header files have different names, we can install all vector headers
 # without worrying about which architecture we actually need
 headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
+deps += ['mempool', 'hash']
diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
index 149677eb1..d40eeb043 100644
--- a/lib/librte_lpm/rte_lpm6.c
+++ b/lib/librte_lpm/rte_lpm6.c
@@ -21,6 +21,10 @@
 #include <rte_errno.h>
 #include <rte_rwlock.h>
 #include <rte_spinlock.h>
+#include <rte_hash.h>
+#include <rte_mempool.h>
+#include <assert.h>
+#include <rte_jhash.h>
 
 #include "rte_lpm6.h"
 
@@ -37,6 +41,8 @@
 #define BYTE_SIZE                                 8
 #define BYTES2_SIZE                              16
 
+#define RULE_HASH_TABLE_EXTRA_SPACE              64
+
 #define lpm6_tbl8_gindex next_hop
 
 /** Flags for setting an entry as valid/invalid. */
@@ -70,6 +76,12 @@ struct rte_lpm6_rule {
 	uint8_t depth; /**< Rule depth. */
 };
 
+/** Rules tbl entry key. */
+struct rte_lpm6_rule_key {
+	uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
+	uint8_t depth; /**< Rule depth. */
+};
+
 /** LPM6 structure. */
 struct rte_lpm6 {
 	/* LPM metadata. */
@@ -80,7 +92,8 @@ struct rte_lpm6 {
 	uint32_t next_tbl8;              /**< Next tbl8 to be used. */
 
 	/* LPM Tables. */
-	struct rte_lpm6_rule *rules_tbl; /**< LPM rules. */
+	struct rte_mempool *rules_pool; /**< LPM rules mempool. */
+	struct rte_hash *rules_tbl; /**< LPM rules. */
 	struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES]
 			__rte_cache_aligned; /**< LPM tbl24 table. */
 	struct rte_lpm6_tbl_entry tbl8[0]
@@ -93,22 +106,81 @@ struct rte_lpm6 {
  * and set the rest to 0.
  */
 static inline void
-mask_ip(uint8_t *ip, uint8_t depth)
+ip6_mask_addr(uint8_t *ip, uint8_t depth)
 {
-        int16_t part_depth, mask;
-        int i;
+	int16_t part_depth, mask;
+	int i;
 
-		part_depth = depth;
+	part_depth = depth;
 
-		for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
-			if (part_depth < BYTE_SIZE && part_depth >= 0) {
-				mask = (uint16_t)(~(UINT8_MAX >> part_depth));
-				ip[i] = (uint8_t)(ip[i] & mask);
-			} else if (part_depth < 0) {
-				ip[i] = 0;
-			}
-			part_depth -= BYTE_SIZE;
-		}
+	for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
+		if (part_depth < BYTE_SIZE && part_depth >= 0) {
+			mask = (uint16_t)(~(UINT8_MAX >> part_depth));
+			ip[i] = (uint8_t)(ip[i] & mask);
+		} else if (part_depth < 0)
+			ip[i] = 0;
+
+		part_depth -= BYTE_SIZE;
+	}
+}
+
+/* copy ipv6 address */
+static inline void
+ip6_copy_addr(uint8_t *dst, const uint8_t *src)
+{
+	rte_memcpy(dst, src, RTE_LPM6_IPV6_ADDR_SIZE);
+}
+
+/*
+ * LPM6 rule hash function
+ *
+ * It's used as a hash function for the rte_hash
+ *	containing rules
+ */
+static inline uint32_t
+rule_hash(const void *data, __rte_unused uint32_t data_len,
+		  uint32_t init_val)
+{
+	return rte_jhash(data, sizeof(struct rte_lpm6_rule_key), init_val);
+}
+
+/*
+ * Init a rule key.
+ *	  note that ip must be already masked
+ */
+static inline void
+rule_key_init(struct rte_lpm6_rule_key *key, uint8_t *ip, uint8_t depth)
+{
+	ip6_copy_addr(key->ip, ip);
+	key->depth = depth;
+}
+
+/*
+ * Rebuild the entire LPM tree by reinserting all rules
+ */
+static void
+rebuild_lpm(struct rte_lpm6 *lpm)
+{
+	struct rte_lpm6_rule *rule;
+	struct rte_lpm6_rule *rule_key;
+	uint32_t iter = 0;
+	while (rte_hash_iterate(lpm->rules_tbl, (void *) &rule_key,
+			(void **) &rule, &iter) >= 0)
+		rte_lpm6_add(lpm, rule->ip, rule->depth, rule->next_hop);
+}
+
+/*
+ *	Free all rules
+ */
+static void
+rules_free(struct rte_lpm6 *lpm)
+{
+	struct rte_lpm6_rule *rule;
+	struct rte_lpm6_rule *rule_key;
+	uint32_t iter = 0;
+	while (rte_hash_iterate(lpm->rules_tbl, (void *) &rule_key,
+			(void **) &rule, &iter) >= 0)
+		rte_mempool_put(lpm->rules_pool, rule);
 }
 
 /*
@@ -121,7 +193,7 @@ rte_lpm6_create(const char *name, int socket_id,
 	char mem_name[RTE_LPM6_NAMESIZE];
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_tailq_entry *te;
-	uint64_t mem_size, rules_size;
+	uint64_t mem_size;
 	struct rte_lpm6_list *lpm_list;
 
 	lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
@@ -136,12 +208,48 @@ rte_lpm6_create(const char *name, int socket_id,
 		return NULL;
 	}
 
+	struct rte_mempool *rules_mempool = NULL;
+	struct rte_hash *rules_tbl = NULL;
+
+	/* allocate rules mempool */
+	snprintf(mem_name, sizeof(mem_name), "LRM_%s", name);
+	rules_mempool = rte_mempool_create(mem_name,
+			config->max_rules, sizeof(struct rte_lpm6_rule), 0, 0,
+			NULL, NULL, NULL, NULL, socket_id,
+			MEMPOOL_F_NO_CACHE_ALIGN);
+	if (rules_mempool == NULL) {
+		RTE_LOG(ERR, LPM, "LPM rules mempool allocation failed: %s (%d)",
+				  rte_strerror(rte_errno), rte_errno);
+		rte_errno = ENOMEM;
+		goto fail_wo_unlock;
+	}
+
+	/* create rules hash table */
+	snprintf(mem_name, sizeof(mem_name), "LRH_%s", name);
+	struct rte_hash_parameters rule_hash_tbl_params = {
+		.entries = config->max_rules * 1.2 +
+			RULE_HASH_TABLE_EXTRA_SPACE,
+		.key_len = sizeof(struct rte_lpm6_rule_key),
+		.hash_func = rule_hash,
+		.hash_func_init_val = 0,
+		.name = mem_name,
+		.reserved = 0,
+		.socket_id = socket_id,
+		.extra_flag = 0
+	};
+
+	rules_tbl = rte_hash_create(&rule_hash_tbl_params);
+	if (rules_tbl == NULL) {
+		RTE_LOG(ERR, LPM, "LPM rules hash table allocation failed: %s (%d)",
+				  rte_strerror(rte_errno), rte_errno);
+		goto fail_wo_unlock;
+	}
+
 	snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
 
 	/* Determine the amount of memory to allocate. */
 	mem_size = sizeof(*lpm) + (sizeof(lpm->tbl8[0]) *
 			RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s);
-	rules_size = sizeof(struct rte_lpm6_rule) * config->max_rules;
 
 	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
 
@@ -154,7 +262,7 @@ rte_lpm6_create(const char *name, int socket_id,
 	lpm = NULL;
 	if (te != NULL) {
 		rte_errno = EEXIST;
-		goto exit;
+		goto fail;
 	}
 
 	/* allocate tailq entry */
@@ -162,7 +270,7 @@ rte_lpm6_create(const char *name, int socket_id,
 	if (te == NULL) {
 		RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n");
 		rte_errno = ENOMEM;
-		goto exit;
+		goto fail;
 	}
 
 	/* Allocate memory to store the LPM data structures. */
@@ -173,19 +281,7 @@ rte_lpm6_create(const char *name, int socket_id,
 		RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
 		rte_free(te);
 		rte_errno = ENOMEM;
-		goto exit;
-	}
-
-	lpm->rules_tbl = rte_zmalloc_socket(NULL,
-			(size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id);
-
-	if (lpm->rules_tbl == NULL) {
-		RTE_LOG(ERR, LPM, "LPM rules_tbl allocation failed\n");
-		rte_free(lpm);
-		lpm = NULL;
-		rte_free(te);
-		rte_errno = ENOMEM;
-		goto exit;
+		goto fail;
 	}
 
 	/* Save user arguments. */
@@ -193,14 +289,23 @@ rte_lpm6_create(const char *name, int socket_id,
 	lpm->number_tbl8s = config->number_tbl8s;
 	snprintf(lpm->name, sizeof(lpm->name), "%s", name);
 
+	lpm->rules_tbl = rules_tbl;
+	lpm->rules_pool = rules_mempool;
+
 	te->data = (void *) lpm;
 
 	TAILQ_INSERT_TAIL(lpm_list, te, next);
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+	return lpm;
 
-exit:
+fail:
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-	return lpm;
+fail_wo_unlock:
+	rte_mempool_free(rules_mempool);
+	rte_hash_free(rules_tbl);
+
+	return NULL;
 }
 
 /*
@@ -259,50 +364,93 @@ rte_lpm6_free(struct rte_lpm6 *lpm)
 
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-	rte_free(lpm->rules_tbl);
+	rte_mempool_free(lpm->rules_pool);
+	rte_hash_free(lpm->rules_tbl);
 	rte_free(lpm);
 	rte_free(te);
 }
 
+/* Find a rule */
+static inline struct rte_lpm6_rule*
+rule_find_with_key(struct rte_lpm6 *lpm,
+		  const struct rte_lpm6_rule_key *rule_key)
+{
+	/* lookup for a rule */
+	struct rte_lpm6_rule	*rule;
+	int ret = rte_hash_lookup_data(lpm->rules_tbl,
+		(const void *) rule_key, (void **) &rule);
+	return (ret >= 0) ? rule : NULL;
+}
+
+/* Find a rule */
+static struct rte_lpm6_rule*
+rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
+{
+	/* init a rule key */
+	struct rte_lpm6_rule_key rule_key;
+	rule_key_init(&rule_key, ip, depth);
+
+	return rule_find_with_key(lpm, &rule_key);
+}
+
 /*
  * Checks if a rule already exists in the rules table and updates
  * the nexthop if so. Otherwise it adds a new rule if enough space is available.
+ *
+ * Returns:
+ *    0 - next hop of existed rule is updated
+ *    1 - new rule successfuly added
+ *   <0 - error
  */
-static inline int32_t
-rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint32_t next_hop, uint8_t depth)
+static inline int
+rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, uint32_t next_hop)
 {
-	uint32_t rule_index;
+	struct rte_lpm6_rule *rule;
 
-	/* Scan through rule list to see if rule already exists. */
-	for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
+	/* init a rule key */
+	struct rte_lpm6_rule_key rule_key;
+	rule_key_init(&rule_key, ip, depth);
 
-		/* If rule already exists update its next_hop and return. */
-		if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
-				RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
-				lpm->rules_tbl[rule_index].depth == depth) {
-			lpm->rules_tbl[rule_index].next_hop = next_hop;
+	/* Scan through rule list to see if rule already exists. */
+	rule = rule_find_with_key(lpm, &rule_key);
 
-			return rule_index;
-		}
+	/* If rule already exists update its next_hop and return. */
+	if (rule != NULL) {
+		rule->next_hop = next_hop;
+		return 0;
 	}
 
 	/*
 	 * If rule does not exist check if there is space to add a new rule to
 	 * this rule group. If there is no space return error.
 	 */
-	if (lpm->used_rules == lpm->max_rules) {
+	if (lpm->used_rules == lpm->max_rules)
 		return -ENOSPC;
-	}
 
-	/* If there is space for the new rule add it. */
-	rte_memcpy(lpm->rules_tbl[rule_index].ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-	lpm->rules_tbl[rule_index].next_hop = next_hop;
-	lpm->rules_tbl[rule_index].depth = depth;
+	/*
+	 * If there is space for the new rule add it.
+	 */
+
+	/* get a new rule */
+	int ret = rte_mempool_get(lpm->rules_pool, (void **) &rule);
+	if (ret < 0)
+		return ret;
+
+	/* init the rule */
+	rule->depth = depth;
+	ip6_copy_addr(rule->ip, ip);
+	rule->next_hop = next_hop;
+
+	/* add the rule */
+	ret = rte_hash_add_key_data(lpm->rules_tbl, &rule_key, rule);
+	if (ret < 0) {
+		rte_mempool_put(lpm->rules_pool, rule);
+		return ret;
+	}
 
 	/* Increment the used rules counter for this rule group. */
 	lpm->used_rules++;
-
-	return rule_index;
+	return 1;
 }
 
 /*
@@ -492,7 +640,7 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 {
 	struct rte_lpm6_tbl_entry *tbl;
 	struct rte_lpm6_tbl_entry *tbl_next = NULL;
-	int32_t rule_index;
+	int32_t ret;
 	int status;
 	uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
 	int i;
@@ -502,16 +650,14 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 		return -EINVAL;
 
 	/* Copy the IP and mask it to avoid modifying user's input data. */
-	memcpy(masked_ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-	mask_ip(masked_ip, depth);
+	ip6_copy_addr(masked_ip, ip);
+	ip6_mask_addr(masked_ip, depth);
 
 	/* Add the rule to the rule table. */
-	rule_index = rule_add(lpm, masked_ip, next_hop, depth);
-
+	ret = rule_add(lpm, masked_ip, depth, next_hop);
 	/* If there is no space available for new rule return error. */
-	if (rule_index < 0) {
-		return rule_index;
-	}
+	if (ret < 0)
+		return ret;
 
 	/* Inspect the first three bytes through tbl24 on the first step. */
 	tbl = lpm->tbl24;
@@ -724,30 +870,6 @@ MAP_STATIC_SYMBOL(int rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
 				int32_t *next_hops, unsigned int n),
 		rte_lpm6_lookup_bulk_func_v1705);
 
-/*
- * Finds a rule in rule table.
- * NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
- */
-static inline int32_t
-rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
-{
-	uint32_t rule_index;
-
-	/* Scan used rules at given depth to find rule. */
-	for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
-		/* If rule is found return the rule index. */
-		if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
-				RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
-				lpm->rules_tbl[rule_index].depth == depth) {
-
-			return rule_index;
-		}
-	}
-
-	/* If rule is not found return -ENOENT. */
-	return -ENOENT;
-}
-
 /*
  * Look for a rule in the high-level rules table
  */
@@ -775,8 +897,7 @@ int
 rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 		uint32_t *next_hop)
 {
-	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
-	int32_t rule_index;
+	uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
 
 	/* Check user arguments. */
 	if ((lpm == NULL) || next_hop == NULL || ip == NULL ||
@@ -784,14 +905,13 @@ rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 		return -EINVAL;
 
 	/* Copy the IP and mask it to avoid modifying user's input data. */
-	memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-	mask_ip(ip_masked, depth);
+	ip6_copy_addr(masked_ip, ip);
+	ip6_mask_addr(masked_ip, depth);
 
-	/* Look for the rule using rule_find. */
-	rule_index = rule_find(lpm, ip_masked, depth);
+	struct rte_lpm6_rule *rule = rule_find(lpm, masked_ip, depth);
 
-	if (rule_index >= 0) {
-		*next_hop = lpm->rules_tbl[rule_index].next_hop;
+	if (rule != NULL) {
+		*next_hop = rule->next_hop;
 		return 1;
 	}
 
@@ -806,16 +926,29 @@ MAP_STATIC_SYMBOL(int rte_lpm6_is_rule_present(struct rte_lpm6 *lpm,
 /*
  * Delete a rule from the rule table.
  * NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
+ * return
+ *	  0 on success
+ *   <0 on failure
  */
-static inline void
-rule_delete(struct rte_lpm6 *lpm, int32_t rule_index)
+static inline int
+rule_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
 {
-	/*
-	 * Overwrite redundant rule with last rule in group and decrement rule
-	 * counter.
-	 */
-	lpm->rules_tbl[rule_index] = lpm->rules_tbl[lpm->used_rules-1];
-	lpm->used_rules--;
+	/* init a rule key */
+	struct rte_lpm6_rule_key rule_key;
+	rule_key_init(&rule_key, ip, depth);
+
+	/* Lookup for a rule */
+	struct rte_lpm6_rule *rule;
+	int ret = rte_hash_lookup_data(lpm->rules_tbl, (void *) &rule_key,
+		(void **) &rule);
+	if (ret >= 0) {
+		/* delete the rule */
+		rte_hash_del_key(lpm->rules_tbl, (void *) &rule_key);
+		lpm->used_rules--;
+		rte_mempool_put(lpm->rules_pool, rule);
+	}
+
+	return ret;
 }
 
 /*
@@ -824,9 +957,8 @@ rule_delete(struct rte_lpm6 *lpm, int32_t rule_index)
 int
 rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
 {
-	int32_t rule_to_delete_index;
-	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
-	unsigned i;
+	uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
+	int ret;
 
 	/*
 	 * Check input arguments.
@@ -836,24 +968,13 @@ rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
 	}
 
 	/* Copy the IP and mask it to avoid modifying user's input data. */
-	memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-	mask_ip(ip_masked, depth);
-
-	/*
-	 * Find the index of the input rule, that needs to be deleted, in the
-	 * rule table.
-	 */
-	rule_to_delete_index = rule_find(lpm, ip_masked, depth);
-
-	/*
-	 * Check if rule_to_delete_index was found. If no rule was found the
-	 * function rule_find returns -ENOENT.
-	 */
-	if (rule_to_delete_index < 0)
-		return rule_to_delete_index;
+	ip6_copy_addr(masked_ip, ip);
+	ip6_mask_addr(masked_ip, depth);
 
 	/* Delete the rule from the rule table. */
-	rule_delete(lpm, rule_to_delete_index);
+	ret = rule_delete(lpm, masked_ip, depth);
+	if (ret < 0)
+		return -ENOENT;
 
 	/*
 	 * Set all the table entries to 0 (ie delete every rule
@@ -868,10 +989,7 @@ rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
 	 * Add every rule again (except for the one that was removed from
 	 * the rules table).
 	 */
-	for (i = 0; i < lpm->used_rules; i++) {
-		rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
-				lpm->rules_tbl[i].next_hop);
-	}
+	rebuild_lpm(lpm);
 
 	return 0;
 }
@@ -883,37 +1001,19 @@ int
 rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
 		uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n)
 {
-	int32_t rule_to_delete_index;
-	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
+	uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
 	unsigned i;
 
 	/*
 	 * Check input arguments.
 	 */
-	if ((lpm == NULL) || (ips == NULL) || (depths == NULL)) {
+	if ((lpm == NULL) || (ips == NULL) || (depths == NULL))
 		return -EINVAL;
-	}
 
 	for (i = 0; i < n; i++) {
-		/* Copy the IP and mask it to avoid modifying user's input data. */
-		memcpy(ip_masked, ips[i], RTE_LPM6_IPV6_ADDR_SIZE);
-		mask_ip(ip_masked, depths[i]);
-
-		/*
-		 * Find the index of the input rule, that needs to be deleted, in the
-		 * rule table.
-		 */
-		rule_to_delete_index = rule_find(lpm, ip_masked, depths[i]);
-
-		/*
-		 * Check if rule_to_delete_index was found. If no rule was found the
-		 * function rule_find returns -ENOENT.
-		 */
-		if (rule_to_delete_index < 0)
-			continue;
-
-		/* Delete the rule from the rule table. */
-		rule_delete(lpm, rule_to_delete_index);
+		ip6_copy_addr(masked_ip, ips[i]);
+		ip6_mask_addr(masked_ip, depths[i]);
+		rule_delete(lpm, masked_ip, depths[i]);
 	}
 
 	/*
@@ -929,10 +1029,7 @@ rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
 	 * Add every rule again (except for the ones that were removed from
 	 * the rules table).
 	 */
-	for (i = 0; i < lpm->used_rules; i++) {
-		rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
-				lpm->rules_tbl[i].next_hop);
-	}
+	rebuild_lpm(lpm);
 
 	return 0;
 }
@@ -956,6 +1053,9 @@ rte_lpm6_delete_all(struct rte_lpm6 *lpm)
 	memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) *
 			RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
 
+	/* put all rules back to the mempool */
+	rules_free(lpm);
+
 	/* Delete all rules form the rules table. */
-	memset(lpm->rules_tbl, 0, sizeof(struct rte_lpm6_rule) * lpm->max_rules);
+	rte_hash_reset(lpm->rules_tbl);
 }
-- 
2.16.1.windows.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/2] librte_lpm: Improve performance of the delete and add functions
  2018-07-16 17:34   ` Alex Kiselev
@ 2018-07-16 17:50     ` Stephen Hemminger
  0 siblings, 0 replies; 6+ messages in thread
From: Stephen Hemminger @ 2018-07-16 17:50 UTC (permalink / raw)
  To: Alex Kiselev; +Cc: dev, Bruce Richardson

On Mon, 16 Jul 2018 20:34:45 +0300
Alex Kiselev <alex@therouter.net> wrote:

> > On Wed, 11 Jul 2018 20:53:46 +0300
> > Alex Kiselev <alex@therouter.net> wrote:  
> 
> >> librte_lpm: Improve lpm6 performance  
> 
> >> Rework the lpm6 rule subsystem and replace
> >> current rules algorithm complexity O(n)
> >> with hashtables which allow dealing with
> >> large (50k) rule sets.  
> 
> 
> > Wouldn't it make more sense to use something like tree, and use left/right
> > in the rules entry. That way the memory is spread and scales with the number
> > of rules.  
> I guess you are trying to propose using a radix tree. I agree, it uses memory
> more efficient than hashtable. But, it would require to add a new dpdk library implementing a
> radix tree, then we can talk about using it as a lpm rule storage.
> 

I solved this problem several years ago by using the standard BSD red-black tree.
This was used by Vyatta/Brocade/ATT and submitted to DPDK but never merged.

The BSD red-black tree is in the libbsd-dev package in Linux. The issue which
seemed to block merging was the dependency on additional packages. Since DPDK
already depends on libnuma not sure why that was an issue.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/2] librte_lpm: Improve performance of the delete and add functions
  2018-07-11 20:30 ` Stephen Hemminger
  2018-07-12  9:29   ` Alex Kiselev
  2018-07-16  8:10   ` Alex Kiselev
@ 2018-07-16 17:34   ` Alex Kiselev
  2018-07-16 17:50     ` Stephen Hemminger
  2 siblings, 1 reply; 6+ messages in thread
From: Alex Kiselev @ 2018-07-16 17:34 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Bruce Richardson


> On Wed, 11 Jul 2018 20:53:46 +0300
> Alex Kiselev <alex@therouter.net> wrote:

>> librte_lpm: Improve lpm6 performance

>> Rework the lpm6 rule subsystem and replace
>> current rules algorithm complexity O(n)
>> with hashtables which allow dealing with
>> large (50k) rule sets.


> Wouldn't it make more sense to use something like tree, and use left/right
> in the rules entry. That way the memory is spread and scales with the number
> of rules.
I guess you are trying to propose using a radix tree. I agree, it uses memory
more efficient than hashtable. But, it would require to add a new dpdk library implementing a
radix tree, then we can talk about using it as a lpm rule storage.

-- 
Alex

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/2] librte_lpm: Improve performance of the delete and add functions
  2018-07-11 20:30 ` Stephen Hemminger
  2018-07-12  9:29   ` Alex Kiselev
@ 2018-07-16  8:10   ` Alex Kiselev
  2018-07-16 17:34   ` Alex Kiselev
  2 siblings, 0 replies; 6+ messages in thread
From: Alex Kiselev @ 2018-07-16  8:10 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Bruce Richardson



> Also. Please run checkpatch shell script on your patches.  For example, there
> should be blank line between declarations and code.
fixed in v5


-- 
Alex

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/2] librte_lpm: Improve performance of the delete and add functions
  2018-07-11 20:30 ` Stephen Hemminger
@ 2018-07-12  9:29   ` Alex Kiselev
  2018-07-16  8:10   ` Alex Kiselev
  2018-07-16 17:34   ` Alex Kiselev
  2 siblings, 0 replies; 6+ messages in thread
From: Alex Kiselev @ 2018-07-12  9:29 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Bruce Richardson



> On Wed, 11 Jul 2018 20:53:46 +0300
> Alex Kiselev <alex@therouter.net> wrote:

>> librte_lpm: Improve lpm6 performance

...

>>  
>>       /* LPM Tables. */
>> -     struct rte_lpm6_rule *rules_tbl; /**< LPM rules. */
>> +     struct rte_mempool *rules_pool; /**< LPM rules mempool. */
>> +     struct rte_hash *rules_tbl; /**< LPM rules. */
>>       struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES]
>>                       __rte_cache_aligned; /**< LPM tbl24 table. */
>>       struct rte_lpm6_tbl_entry tbl8[0]
>> @@ -93,22 +106,81 @@ struct rte_lpm6 {
>>   * and set the rest to 0.


> What is the increased memory overhead of having a hash table?
compared to the current rules array it's about 2 times since a prefix is stored in
a rule (mempool) and in a rule key (hashtable). 
I am only talking here about the rule storage.

And I've just realised it doesn't have to be this
way, I don't need the rules mempool anymore. I only need the rules hashtable, since 
it could contains everything a rule needs. A rule prefix is stored in a hash key, 
and a next hop index could be stored in a hash value. That would eliminate memory overhead.

I'll try this way in next patch series.

> Wouldn't it make more sense to use something like tree, and use left/right
> in the rules entry. That way the memory is spread and scales with the number
> of rules.
Maybe. But there is no tree library in the DPDK. So I choose 
a fast and simple way to implement
rules storage using the existent hashtable lib.
And it gives good perfomance results.

Anyway, it's not a data plane, add/delete operations are 
executed not very often, so it's not critical to find
the most efficient (in terms of memory consumption) way, a good one is ok.

> Remember on a internet router, it is not unusual to 2M or more rules.

> Also. Please run checkpatch shell script on your patches.  For example, there
> should be blank line between declarations and code.
I have. It didn't give me any warnings.




-- 
Alex

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/2] librte_lpm: Improve performance of the delete and add functions
       [not found] <20180711175356.035761B462@dpdk.org>
@ 2018-07-11 20:30 ` Stephen Hemminger
  2018-07-12  9:29   ` Alex Kiselev
                     ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Stephen Hemminger @ 2018-07-11 20:30 UTC (permalink / raw)
  To: Alex Kiselev; +Cc: dev, Bruce Richardson

On Wed, 11 Jul 2018 20:53:46 +0300
Alex Kiselev <alex@therouter.net> wrote:

> librte_lpm: Improve lpm6 performance
> 
> Rework the lpm6 rule subsystem and replace
> current rules algorithm complexity O(n)
> with hashtables which allow dealing with
> large (50k) rule sets.
> 
> Signed-off-by: Alex Kiselev <alex@therouter.net>
> ---
>  lib/Makefile               |   2 +-
>  lib/librte_lpm/meson.build |   1 +
>  lib/librte_lpm/rte_lpm6.c  | 406 ++++++++++++++++++++++++++++-----------------
>  3 files changed, 255 insertions(+), 154 deletions(-)
> 
> diff --git a/lib/Makefile b/lib/Makefile
> index d82462ba2..79aeaa04f 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -47,7 +47,7 @@ DEPDIRS-librte_hash := librte_eal librte_ring
>  DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
>  DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
>  DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
> -DEPDIRS-librte_lpm := librte_eal
> +DEPDIRS-librte_lpm := librte_eal librte_mempool librte_hash
>  DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
>  DEPDIRS-librte_acl := librte_eal
>  DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member
> diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
> index 067849427..fcb5bed2c 100644
> --- a/lib/librte_lpm/meson.build
> +++ b/lib/librte_lpm/meson.build
> @@ -7,3 +7,4 @@ headers = files('rte_lpm.h', 'rte_lpm6.h')
>  # since header files have different names, we can install all vector headers
>  # without worrying about which architecture we actually need
>  headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
> +deps += ['mempool', 'hash']
> diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
> index 149677eb1..d40eeb043 100644
> --- a/lib/librte_lpm/rte_lpm6.c
> +++ b/lib/librte_lpm/rte_lpm6.c
> @@ -21,6 +21,10 @@
>  #include <rte_errno.h>
>  #include <rte_rwlock.h>
>  #include <rte_spinlock.h>
> +#include <rte_hash.h>
> +#include <rte_mempool.h>
> +#include <assert.h>
> +#include <rte_jhash.h>
>  
>  #include "rte_lpm6.h"
>  
> @@ -37,6 +41,8 @@
>  #define BYTE_SIZE                                 8
>  #define BYTES2_SIZE                              16
>  
> +#define RULE_HASH_TABLE_EXTRA_SPACE              64
> +
>  #define lpm6_tbl8_gindex next_hop
>  
>  /** Flags for setting an entry as valid/invalid. */
> @@ -70,6 +76,12 @@ struct rte_lpm6_rule {
>  	uint8_t depth; /**< Rule depth. */
>  };
>  
> +/** Rules tbl entry key. */
> +struct rte_lpm6_rule_key {
> +	uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
> +	uint8_t depth; /**< Rule depth. */
> +};
> +
>  /** LPM6 structure. */
>  struct rte_lpm6 {
>  	/* LPM metadata. */
> @@ -80,7 +92,8 @@ struct rte_lpm6 {
>  	uint32_t next_tbl8;              /**< Next tbl8 to be used. */
>  
>  	/* LPM Tables. */
> -	struct rte_lpm6_rule *rules_tbl; /**< LPM rules. */
> +	struct rte_mempool *rules_pool; /**< LPM rules mempool. */
> +	struct rte_hash *rules_tbl; /**< LPM rules. */
>  	struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES]
>  			__rte_cache_aligned; /**< LPM tbl24 table. */
>  	struct rte_lpm6_tbl_entry tbl8[0]
> @@ -93,22 +106,81 @@ struct rte_lpm6 {
>   * and set the rest to 0.


What is the increased memory overhead of having a hash table?

Wouldn't it make more sense to use something like tree, and use left/right
in the rules entry. That way the memory is spread and scales with the number
of rules.

Remember on a internet router, it is not unusual to 2M or more rules.

Also. Please run checkpatch shell script on your patches.  For example, there
should be blank line between declarations and code.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-07-16 17:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-11 17:53 [dpdk-dev] [PATCH v3 1/2] librte_lpm: Improve performance of the delete and add functions Alex Kiselev
     [not found] <20180711175356.035761B462@dpdk.org>
2018-07-11 20:30 ` Stephen Hemminger
2018-07-12  9:29   ` Alex Kiselev
2018-07-16  8:10   ` Alex Kiselev
2018-07-16 17:34   ` Alex Kiselev
2018-07-16 17:50     ` Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).