DPDK patches and discussions
 help / color / mirror / Atom feed
From: Suanming Mou <suanmingm@nvidia.com>
To: viacheslavo@nvidia.com, matan@nvidia.com
Cc: rasland@nvidia.com, dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2 1/2] net/mlx5: optimize hash list synchronization
Date: Thu,  3 Dec 2020 04:18:51 +0200
Message-ID: <20201203021852.1204285-2-suanmingm@nvidia.com> (raw)
In-Reply-To: <20201203021852.1204285-1-suanmingm@nvidia.com>

Since all the hash table operations are related with one dedicated
bucket, the hash table lock and gen_cnt can be allocated per-bucket.

Currently, the hash table uses one global lock to protect all the
buckets, that global lock avoids the buckets to be operated at one
time, it hurts the hash table performance. And the gen_cnt updated
by the entire hash table causes incorrect redundant list research.

This commit optimized the lock and gen_cnt to bucket solid allows
different bucket entries can be operated more efficiently.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/mlx5_utils.c | 76 +++++++++++++++++++++--------------
 drivers/net/mlx5/mlx5_utils.h | 12 ++++--
 2 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index 848d108bc6..42e5d80aee 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -41,6 +41,7 @@ mlx5_hlist_create(const char *name, uint32_t size, uint32_t entry_size,
 	struct mlx5_hlist *h;
 	uint32_t act_size;
 	uint32_t alloc_size;
+	uint32_t i;
 
 	if (!size || (!cb_create ^ !cb_remove))
 		return NULL;
@@ -53,7 +54,7 @@ mlx5_hlist_create(const char *name, uint32_t size, uint32_t entry_size,
 		act_size = size;
 	}
 	alloc_size = sizeof(struct mlx5_hlist) +
-		     sizeof(struct mlx5_hlist_head) * act_size;
+		     sizeof(struct mlx5_hlist_bucket) * act_size;
 	/* Using zmalloc, then no need to initialize the heads. */
 	h = mlx5_malloc(MLX5_MEM_ZERO, alloc_size, RTE_CACHE_LINE_SIZE,
 			SOCKET_ID_ANY);
@@ -72,25 +73,22 @@ mlx5_hlist_create(const char *name, uint32_t size, uint32_t entry_size,
 	h->cb_create = cb_create ? cb_create : mlx5_hlist_default_create_cb;
 	h->cb_match = cb_match ? cb_match : mlx5_hlist_default_match_cb;
 	h->cb_remove = cb_remove ? cb_remove : mlx5_hlist_default_remove_cb;
-	rte_rwlock_init(&h->lock);
+	for (i = 0; i < act_size; i++)
+		rte_rwlock_init(&h->buckets[i].lock);
 	DRV_LOG(DEBUG, "Hash list with %s size 0x%" PRIX32 " is created.",
 		h->name, act_size);
 	return h;
 }
 
 static struct mlx5_hlist_entry *
-__hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx, bool reuse)
+__hlist_lookup(struct mlx5_hlist *h, uint64_t key, uint32_t idx,
+	       void *ctx, bool reuse)
 {
-	uint32_t idx;
 	struct mlx5_hlist_head *first;
 	struct mlx5_hlist_entry *node;
 
 	MLX5_ASSERT(h);
-	if (h->direct_key)
-		idx = (uint32_t)(key & h->mask);
-	else
-		idx = rte_hash_crc_8byte(key, 0) & h->mask;
-	first = &h->heads[idx];
+	first = &h->buckets[idx].head;
 	LIST_FOREACH(node, first, next) {
 		if (!h->cb_match(h, node, key, ctx)) {
 			if (reuse) {
@@ -107,21 +105,28 @@ __hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx, bool reuse)
 }
 
 static struct mlx5_hlist_entry *
-hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx, bool reuse)
+hlist_lookup(struct mlx5_hlist *h, uint64_t key, uint32_t idx,
+	     void *ctx, bool reuse)
 {
 	struct mlx5_hlist_entry *node;
 
 	MLX5_ASSERT(h);
-	rte_rwlock_read_lock(&h->lock);
-	node = __hlist_lookup(h, key, ctx, reuse);
-	rte_rwlock_read_unlock(&h->lock);
+	rte_rwlock_read_lock(&h->buckets[idx].lock);
+	node = __hlist_lookup(h, key, idx, ctx, reuse);
+	rte_rwlock_read_unlock(&h->buckets[idx].lock);
 	return node;
 }
 
 struct mlx5_hlist_entry *
 mlx5_hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx)
 {
-	return hlist_lookup(h, key, ctx, false);
+	uint32_t idx;
+
+	if (h->direct_key)
+		idx = (uint32_t)(key & h->mask);
+	else
+		idx = rte_hash_crc_8byte(key, 0) & h->mask;
+	return hlist_lookup(h, key, idx, ctx, false);
 }
 
 struct mlx5_hlist_entry*
@@ -129,30 +134,32 @@ mlx5_hlist_register(struct mlx5_hlist *h, uint64_t key, void *ctx)
 {
 	uint32_t idx;
 	struct mlx5_hlist_head *first;
+	struct mlx5_hlist_bucket *b;
 	struct mlx5_hlist_entry *entry;
 	uint32_t prev_gen_cnt = 0;
 
+	if (h->direct_key)
+		idx = (uint32_t)(key & h->mask);
+	else
+		idx = rte_hash_crc_8byte(key, 0) & h->mask;
 	MLX5_ASSERT(h);
+	b = &h->buckets[idx];
 	/* Use write lock directly for write-most list. */
 	if (!h->write_most) {
-		prev_gen_cnt = __atomic_load_n(&h->gen_cnt, __ATOMIC_ACQUIRE);
-		entry = hlist_lookup(h, key, ctx, true);
+		prev_gen_cnt = __atomic_load_n(&b->gen_cnt, __ATOMIC_ACQUIRE);
+		entry = hlist_lookup(h, key, idx, ctx, true);
 		if (entry)
 			return entry;
 	}
-	rte_rwlock_write_lock(&h->lock);
+	rte_rwlock_write_lock(&b->lock);
 	/* Check if the list changed by other threads. */
 	if (h->write_most ||
-	    prev_gen_cnt != __atomic_load_n(&h->gen_cnt, __ATOMIC_ACQUIRE)) {
-		entry = __hlist_lookup(h, key, ctx, true);
+	    prev_gen_cnt != __atomic_load_n(&b->gen_cnt, __ATOMIC_ACQUIRE)) {
+		entry = __hlist_lookup(h, key, idx, ctx, true);
 		if (entry)
 			goto done;
 	}
-	if (h->direct_key)
-		idx = (uint32_t)(key & h->mask);
-	else
-		idx = rte_hash_crc_8byte(key, 0) & h->mask;
-	first = &h->heads[idx];
+	first = &b->head;
 	entry = h->cb_create(h, key, ctx);
 	if (!entry) {
 		rte_errno = ENOMEM;
@@ -162,30 +169,37 @@ mlx5_hlist_register(struct mlx5_hlist *h, uint64_t key, void *ctx)
 	entry->key = key;
 	entry->ref_cnt = 1;
 	LIST_INSERT_HEAD(first, entry, next);
-	__atomic_add_fetch(&h->gen_cnt, 1, __ATOMIC_ACQ_REL);
+	__atomic_add_fetch(&b->gen_cnt, 1, __ATOMIC_ACQ_REL);
 	DRV_LOG(DEBUG, "Hash list %s entry %p new: %u.",
 		h->name, (void *)entry, entry->ref_cnt);
 done:
-	rte_rwlock_write_unlock(&h->lock);
+	rte_rwlock_write_unlock(&b->lock);
 	return entry;
 }
 
 int
 mlx5_hlist_unregister(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry)
 {
-	rte_rwlock_write_lock(&h->lock);
+	uint32_t idx;
+
+	if (h->direct_key)
+		idx = (uint32_t)(entry->key & h->mask);
+	else
+		idx = rte_hash_crc_8byte(entry->key, 0) & h->mask;
+
+	rte_rwlock_write_lock(&h->buckets[idx].lock);
 	MLX5_ASSERT(entry && entry->ref_cnt && entry->next.le_prev);
 	DRV_LOG(DEBUG, "Hash list %s entry %p deref: %u.",
 		h->name, (void *)entry, entry->ref_cnt);
 	if (--entry->ref_cnt) {
-		rte_rwlock_write_unlock(&h->lock);
+		rte_rwlock_write_unlock(&h->buckets[idx].lock);
 		return 1;
 	}
 	LIST_REMOVE(entry, next);
 	/* Set to NULL to get rid of removing action for more than once. */
 	entry->next.le_prev = NULL;
 	h->cb_remove(h, entry);
-	rte_rwlock_write_unlock(&h->lock);
+	rte_rwlock_write_unlock(&h->buckets[idx].lock);
 	DRV_LOG(DEBUG, "Hash list %s entry %p removed.",
 		h->name, (void *)entry);
 	return 0;
@@ -200,8 +214,8 @@ mlx5_hlist_destroy(struct mlx5_hlist *h)
 	MLX5_ASSERT(h);
 	for (idx = 0; idx < h->table_sz; ++idx) {
 		/* No LIST_FOREACH_SAFE, using while instead. */
-		while (!LIST_EMPTY(&h->heads[idx])) {
-			entry = LIST_FIRST(&h->heads[idx]);
+		while (!LIST_EMPTY(&h->buckets[idx].head)) {
+			entry = LIST_FIRST(&h->buckets[idx].head);
 			LIST_REMOVE(entry, next);
 			/*
 			 * The owner of whole element which contains data entry
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index be6e5f67aa..6f2576812b 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -328,6 +328,13 @@ typedef struct mlx5_hlist_entry *(*mlx5_hlist_create_cb)
 				  (struct mlx5_hlist *list,
 				   uint64_t key, void *ctx);
 
+/* Hash list bucket head. */
+struct mlx5_hlist_bucket {
+	struct mlx5_hlist_head head; /* List head. */
+	rte_rwlock_t lock; /* Bucket lock. */
+	uint32_t gen_cnt; /* List modification will update generation count. */
+} __rte_cache_aligned;
+
 /**
  * Hash list table structure
  *
@@ -344,15 +351,14 @@ struct mlx5_hlist {
 	uint32_t entry_sz; /**< Size of entry, used to allocate entry. */
 	/**< mask to get the index of the list heads. */
 	uint32_t mask;
-	rte_rwlock_t lock;
-	uint32_t gen_cnt; /* List modification will update generation count. */
 	bool direct_key; /* Use the new entry key directly as hash index. */
 	bool write_most; /* List mostly used for append new or destroy. */
 	void *ctx;
 	mlx5_hlist_create_cb cb_create; /**< entry create callback. */
 	mlx5_hlist_match_cb cb_match; /**< entry match callback. */
 	mlx5_hlist_remove_cb cb_remove; /**< entry remove callback. */
-	struct mlx5_hlist_head heads[];	/**< list head arrays. */
+	struct mlx5_hlist_bucket buckets[] __rte_cache_aligned;
+	/**< list bucket arrays. */
 };
 
 /**
-- 
2.25.1


  reply	other threads:[~2020-12-03  2:19 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-02 11:39 [dpdk-dev] [PATCH 0/2] net/mlx5: hash list optimization Suanming Mou
2020-12-02 11:39 ` [dpdk-dev] [PATCH 1/2] net/mlx5: optimize hash list synchronization Suanming Mou
2020-12-02 11:39 ` [dpdk-dev] [PATCH 2/2] net/mlx5: optimize the hash list entry memory Suanming Mou
2020-12-03  2:18 ` [dpdk-dev] [PATCH v2 0/2] net/mlx5: hash list optimization Suanming Mou
2020-12-03  2:18   ` Suanming Mou [this message]
2020-12-03  2:18   ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: optimize the hash list entry memory Suanming Mou
2020-12-08 23:40   ` [dpdk-dev] [PATCH v2 0/2] net/mlx5: hash list optimization Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201203021852.1204285-2-suanmingm@nvidia.com \
    --to=suanmingm@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=rasland@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git