DPDK patches and discussions
 help / color / mirror / Atom feed
From: Suanming Mou <suanmingm@nvidia.com>
To: viacheslavo@nvidia.com, matan@nvidia.com
Cc: rasland@nvidia.com, dev@dpdk.org
Subject: [dpdk-dev] [PATCH 1/2] net/mlx5: optimize hash list synchronization
Date: Wed,  2 Dec 2020 19:39:24 +0800	[thread overview]
Message-ID: <1606909165-164407-2-git-send-email-suanmingm@nvidia.com> (raw)
In-Reply-To: <1606909165-164407-1-git-send-email-suanmingm@nvidia.com>

Since all the hash table operations are related with one dedicated
bucket, the hash table lock and gen_cnt can be allocated per-bucket.

Currently, the hash table uses one global lock to protect all the
buckets, that global lock avoids the buckets to be operated at one
time, it hurts the hash table performance. And the gen_cnt updated
by the entire hash table causes incorrect redundant list research.

This commit optimized the lock and gen_cnt to bucket solid allows
different bucket entries can be operated more efficiently.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/mlx5_utils.c | 76 +++++++++++++++++++++++++------------------
 drivers/net/mlx5/mlx5_utils.h | 12 +++++--
 2 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index 848d108..42e5d80 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -41,6 +41,7 @@ struct mlx5_hlist *
 	struct mlx5_hlist *h;
 	uint32_t act_size;
 	uint32_t alloc_size;
+	uint32_t i;
 
 	if (!size || (!cb_create ^ !cb_remove))
 		return NULL;
@@ -53,7 +54,7 @@ struct mlx5_hlist *
 		act_size = size;
 	}
 	alloc_size = sizeof(struct mlx5_hlist) +
-		     sizeof(struct mlx5_hlist_head) * act_size;
+		     sizeof(struct mlx5_hlist_bucket) * act_size;
 	/* Using zmalloc, then no need to initialize the heads. */
 	h = mlx5_malloc(MLX5_MEM_ZERO, alloc_size, RTE_CACHE_LINE_SIZE,
 			SOCKET_ID_ANY);
@@ -72,25 +73,22 @@ struct mlx5_hlist *
 	h->cb_create = cb_create ? cb_create : mlx5_hlist_default_create_cb;
 	h->cb_match = cb_match ? cb_match : mlx5_hlist_default_match_cb;
 	h->cb_remove = cb_remove ? cb_remove : mlx5_hlist_default_remove_cb;
-	rte_rwlock_init(&h->lock);
+	for (i = 0; i < act_size; i++)
+		rte_rwlock_init(&h->buckets[i].lock);
 	DRV_LOG(DEBUG, "Hash list with %s size 0x%" PRIX32 " is created.",
 		h->name, act_size);
 	return h;
 }
 
 static struct mlx5_hlist_entry *
-__hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx, bool reuse)
+__hlist_lookup(struct mlx5_hlist *h, uint64_t key, uint32_t idx,
+	       void *ctx, bool reuse)
 {
-	uint32_t idx;
 	struct mlx5_hlist_head *first;
 	struct mlx5_hlist_entry *node;
 
 	MLX5_ASSERT(h);
-	if (h->direct_key)
-		idx = (uint32_t)(key & h->mask);
-	else
-		idx = rte_hash_crc_8byte(key, 0) & h->mask;
-	first = &h->heads[idx];
+	first = &h->buckets[idx].head;
 	LIST_FOREACH(node, first, next) {
 		if (!h->cb_match(h, node, key, ctx)) {
 			if (reuse) {
@@ -107,21 +105,28 @@ struct mlx5_hlist *
 }
 
 static struct mlx5_hlist_entry *
-hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx, bool reuse)
+hlist_lookup(struct mlx5_hlist *h, uint64_t key, uint32_t idx,
+	     void *ctx, bool reuse)
 {
 	struct mlx5_hlist_entry *node;
 
 	MLX5_ASSERT(h);
-	rte_rwlock_read_lock(&h->lock);
-	node = __hlist_lookup(h, key, ctx, reuse);
-	rte_rwlock_read_unlock(&h->lock);
+	rte_rwlock_read_lock(&h->buckets[idx].lock);
+	node = __hlist_lookup(h, key, idx, ctx, reuse);
+	rte_rwlock_read_unlock(&h->buckets[idx].lock);
 	return node;
 }
 
 struct mlx5_hlist_entry *
 mlx5_hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx)
 {
-	return hlist_lookup(h, key, ctx, false);
+	uint32_t idx;
+
+	if (h->direct_key)
+		idx = (uint32_t)(key & h->mask);
+	else
+		idx = rte_hash_crc_8byte(key, 0) & h->mask;
+	return hlist_lookup(h, key, idx, ctx, false);
 }
 
 struct mlx5_hlist_entry*
@@ -129,30 +134,32 @@ struct mlx5_hlist_entry*
 {
 	uint32_t idx;
 	struct mlx5_hlist_head *first;
+	struct mlx5_hlist_bucket *b;
 	struct mlx5_hlist_entry *entry;
 	uint32_t prev_gen_cnt = 0;
 
+	if (h->direct_key)
+		idx = (uint32_t)(key & h->mask);
+	else
+		idx = rte_hash_crc_8byte(key, 0) & h->mask;
 	MLX5_ASSERT(h);
+	b = &h->buckets[idx];
 	/* Use write lock directly for write-most list. */
 	if (!h->write_most) {
-		prev_gen_cnt = __atomic_load_n(&h->gen_cnt, __ATOMIC_ACQUIRE);
-		entry = hlist_lookup(h, key, ctx, true);
+		prev_gen_cnt = __atomic_load_n(&b->gen_cnt, __ATOMIC_ACQUIRE);
+		entry = hlist_lookup(h, key, idx, ctx, true);
 		if (entry)
 			return entry;
 	}
-	rte_rwlock_write_lock(&h->lock);
+	rte_rwlock_write_lock(&b->lock);
 	/* Check if the list changed by other threads. */
 	if (h->write_most ||
-	    prev_gen_cnt != __atomic_load_n(&h->gen_cnt, __ATOMIC_ACQUIRE)) {
-		entry = __hlist_lookup(h, key, ctx, true);
+	    prev_gen_cnt != __atomic_load_n(&b->gen_cnt, __ATOMIC_ACQUIRE)) {
+		entry = __hlist_lookup(h, key, idx, ctx, true);
 		if (entry)
 			goto done;
 	}
-	if (h->direct_key)
-		idx = (uint32_t)(key & h->mask);
-	else
-		idx = rte_hash_crc_8byte(key, 0) & h->mask;
-	first = &h->heads[idx];
+	first = &b->head;
 	entry = h->cb_create(h, key, ctx);
 	if (!entry) {
 		rte_errno = ENOMEM;
@@ -162,30 +169,37 @@ struct mlx5_hlist_entry*
 	entry->key = key;
 	entry->ref_cnt = 1;
 	LIST_INSERT_HEAD(first, entry, next);
-	__atomic_add_fetch(&h->gen_cnt, 1, __ATOMIC_ACQ_REL);
+	__atomic_add_fetch(&b->gen_cnt, 1, __ATOMIC_ACQ_REL);
 	DRV_LOG(DEBUG, "Hash list %s entry %p new: %u.",
 		h->name, (void *)entry, entry->ref_cnt);
 done:
-	rte_rwlock_write_unlock(&h->lock);
+	rte_rwlock_write_unlock(&b->lock);
 	return entry;
 }
 
 int
 mlx5_hlist_unregister(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry)
 {
-	rte_rwlock_write_lock(&h->lock);
+	uint32_t idx;
+
+	if (h->direct_key)
+		idx = (uint32_t)(entry->key & h->mask);
+	else
+		idx = rte_hash_crc_8byte(entry->key, 0) & h->mask;
+
+	rte_rwlock_write_lock(&h->buckets[idx].lock);
 	MLX5_ASSERT(entry && entry->ref_cnt && entry->next.le_prev);
 	DRV_LOG(DEBUG, "Hash list %s entry %p deref: %u.",
 		h->name, (void *)entry, entry->ref_cnt);
 	if (--entry->ref_cnt) {
-		rte_rwlock_write_unlock(&h->lock);
+		rte_rwlock_write_unlock(&h->buckets[idx].lock);
 		return 1;
 	}
 	LIST_REMOVE(entry, next);
 	/* Set to NULL to get rid of removing action for more than once. */
 	entry->next.le_prev = NULL;
 	h->cb_remove(h, entry);
-	rte_rwlock_write_unlock(&h->lock);
+	rte_rwlock_write_unlock(&h->buckets[idx].lock);
 	DRV_LOG(DEBUG, "Hash list %s entry %p removed.",
 		h->name, (void *)entry);
 	return 0;
@@ -200,8 +214,8 @@ struct mlx5_hlist_entry*
 	MLX5_ASSERT(h);
 	for (idx = 0; idx < h->table_sz; ++idx) {
 		/* No LIST_FOREACH_SAFE, using while instead. */
-		while (!LIST_EMPTY(&h->heads[idx])) {
-			entry = LIST_FIRST(&h->heads[idx]);
+		while (!LIST_EMPTY(&h->buckets[idx].head)) {
+			entry = LIST_FIRST(&h->buckets[idx].head);
 			LIST_REMOVE(entry, next);
 			/*
 			 * The owner of whole element which contains data entry
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index be6e5f6..6f25768 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -328,6 +328,13 @@ typedef int (*mlx5_hlist_match_cb)(struct mlx5_hlist *list,
 				  (struct mlx5_hlist *list,
 				   uint64_t key, void *ctx);
 
+/* Hash list bucket head. */
+struct mlx5_hlist_bucket {
+	struct mlx5_hlist_head head; /* List head. */
+	rte_rwlock_t lock; /* Bucket lock. */
+	uint32_t gen_cnt; /* List modification will update generation count. */
+} __rte_cache_aligned;
+
 /**
  * Hash list table structure
  *
@@ -344,15 +351,14 @@ struct mlx5_hlist {
 	uint32_t entry_sz; /**< Size of entry, used to allocate entry. */
 	/**< mask to get the index of the list heads. */
 	uint32_t mask;
-	rte_rwlock_t lock;
-	uint32_t gen_cnt; /* List modification will update generation count. */
 	bool direct_key; /* Use the new entry key directly as hash index. */
 	bool write_most; /* List mostly used for append new or destroy. */
 	void *ctx;
 	mlx5_hlist_create_cb cb_create; /**< entry create callback. */
 	mlx5_hlist_match_cb cb_match; /**< entry match callback. */
 	mlx5_hlist_remove_cb cb_remove; /**< entry remove callback. */
-	struct mlx5_hlist_head heads[];	/**< list head arrays. */
+	struct mlx5_hlist_bucket buckets[] __rte_cache_aligned;
+	/**< list bucket arrays. */
 };
 
 /**
-- 
1.8.3.1


  reply	other threads:[~2020-12-02 11:39 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-02 11:39 [dpdk-dev] [PATCH 0/2] net/mlx5: hash list optimization Suanming Mou
2020-12-02 11:39 ` Suanming Mou [this message]
2020-12-02 11:39 ` [dpdk-dev] [PATCH 2/2] net/mlx5: optimize the hash list entry memory Suanming Mou
2020-12-03  2:18 ` [dpdk-dev] [PATCH v2 0/2] net/mlx5: hash list optimization Suanming Mou
2020-12-03  2:18   ` [dpdk-dev] [PATCH v2 1/2] net/mlx5: optimize hash list synchronization Suanming Mou
2020-12-03  2:18   ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: optimize the hash list entry memory Suanming Mou
2020-12-08 23:40   ` [dpdk-dev] [PATCH v2 0/2] net/mlx5: hash list optimization Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1606909165-164407-2-git-send-email-suanmingm@nvidia.com \
    --to=suanmingm@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=rasland@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).