DPDK patches and discussions
 help / color / mirror / Atom feed
From: David Marchand <david.marchand@redhat.com>
To: dev@dpdk.org
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>,
	Chenbo Xia <chenbo.xia@intel.com>
Subject: [PATCH 2/2] vhost: stop using mempool for IOTLB
Date: Fri, 22 Jul 2022 15:53:20 +0200	[thread overview]
Message-ID: <20220722135320.109269-2-david.marchand@redhat.com> (raw)
In-Reply-To: <20220722135320.109269-1-david.marchand@redhat.com>

A mempool consumes 3 memzones (with the default ring mempool driver).
Default DPDK configuration allows RTE_MAX_MEMZONE (2560) memzones.

Assuming there is no other memzones (well, there is always other
memzones in a DPDK application), that means that we can have a maximum
of 853 mempools.

The IOTLB so far was requesting a mempool per vq, which means that at
the maximum, the vhost library can request mempools for 426 qps.

This limit was recently reached on big systems with a lot of virtio
ports (and multiqueue in use).

While the limit on mempool count could be something we need to look at
the DPDK project level, there is no reason to use mempools with the
IOTLB code.

The IOTLB cache entries do not need to be DMA-able and are only used by
the current process (in multiprocess context).
Getting/putting objects from/in the mempool is always associated with
some other locks, so some level of contention is already present.

We can convert to a malloc'd pool with objects put in a free list
protected by a spinlock.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/vhost/iotlb.c | 102 ++++++++++++++++++++++++++++------------------
 lib/vhost/iotlb.h |   1 +
 lib/vhost/vhost.c |   2 +-
 lib/vhost/vhost.h |   4 +-
 4 files changed, 67 insertions(+), 42 deletions(-)

diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c
index dd35338ec0..2a78929e78 100644
--- a/lib/vhost/iotlb.c
+++ b/lib/vhost/iotlb.c
@@ -13,6 +13,7 @@
 
 struct vhost_iotlb_entry {
 	TAILQ_ENTRY(vhost_iotlb_entry) next;
+	SLIST_ENTRY(vhost_iotlb_entry) next_free;
 
 	uint64_t iova;
 	uint64_t uaddr;
@@ -22,6 +23,28 @@ struct vhost_iotlb_entry {
 
 #define IOTLB_CACHE_SIZE 2048
 
+static struct vhost_iotlb_entry *
+vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq)
+{
+	struct vhost_iotlb_entry *node;
+
+	rte_spinlock_lock(&vq->iotlb_free_lock);
+	node = SLIST_FIRST(&vq->iotlb_free_list);
+	if (node != NULL)
+		SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free);
+	rte_spinlock_unlock(&vq->iotlb_free_lock);
+	return node;
+}
+
+static void
+vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq,
+	struct vhost_iotlb_entry *node)
+{
+	rte_spinlock_lock(&vq->iotlb_free_lock);
+	SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free);
+	rte_spinlock_unlock(&vq->iotlb_free_lock);
+}
+
 static void
 vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq);
 
@@ -34,7 +57,7 @@ vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
 
 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
-		rte_mempool_put(vq->iotlb_pool, node);
+		vhost_user_iotlb_pool_put(vq, node);
 	}
 
 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
@@ -66,22 +89,21 @@ vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *
 				uint64_t iova, uint8_t perm)
 {
 	struct vhost_iotlb_entry *node;
-	int ret;
 
-	ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
-	if (ret) {
+	node = vhost_user_iotlb_pool_get(vq);
+	if (node == NULL) {
 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
-			"IOTLB pool %s empty, clear entries for pending insertion\n",
-			vq->iotlb_pool->name);
+			"IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n",
+			vq->index);
 		if (!TAILQ_EMPTY(&vq->iotlb_pending_list))
 			vhost_user_iotlb_pending_remove_all(vq);
 		else
 			vhost_user_iotlb_cache_random_evict(vq);
-		ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
-		if (ret) {
+		node = vhost_user_iotlb_pool_get(vq);
+		if (node == NULL) {
 			VHOST_LOG_CONFIG(dev->ifname, ERR,
-				"IOTLB pool %s still empty, pending insertion failure\n",
-				vq->iotlb_pool->name);
+				"IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n",
+				vq->index);
 			return;
 		}
 	}
@@ -113,7 +135,7 @@ vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
 		if ((node->perm & perm) != node->perm)
 			continue;
 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
-		rte_mempool_put(vq->iotlb_pool, node);
+		vhost_user_iotlb_pool_put(vq, node);
 	}
 
 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
@@ -128,7 +150,7 @@ vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
 
 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
 		TAILQ_REMOVE(&vq->iotlb_list, node, next);
-		rte_mempool_put(vq->iotlb_pool, node);
+		vhost_user_iotlb_pool_put(vq, node);
 	}
 
 	vq->iotlb_cache_nr = 0;
@@ -149,7 +171,7 @@ vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
 		if (!entry_idx) {
 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
-			rte_mempool_put(vq->iotlb_pool, node);
+			vhost_user_iotlb_pool_put(vq, node);
 			vq->iotlb_cache_nr--;
 			break;
 		}
@@ -165,22 +187,21 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq
 				uint64_t size, uint8_t perm)
 {
 	struct vhost_iotlb_entry *node, *new_node;
-	int ret;
 
-	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
-	if (ret) {
+	new_node = vhost_user_iotlb_pool_get(vq);
+	if (new_node == NULL) {
 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
-			"IOTLB pool %s empty, clear entries for cache insertion\n",
-			vq->iotlb_pool->name);
+			"IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n",
+			vq->index);
 		if (!TAILQ_EMPTY(&vq->iotlb_list))
 			vhost_user_iotlb_cache_random_evict(vq);
 		else
 			vhost_user_iotlb_pending_remove_all(vq);
-		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
-		if (ret) {
+		new_node = vhost_user_iotlb_pool_get(vq);
+		if (new_node == NULL) {
 			VHOST_LOG_CONFIG(dev->ifname, ERR,
-				"IOTLB pool %s still empty, cache insertion failed\n",
-				vq->iotlb_pool->name);
+				"IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n",
+				vq->index);
 			return;
 		}
 	}
@@ -198,7 +219,7 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq
 		 * So if iova already in list, assume identical.
 		 */
 		if (node->iova == new_node->iova) {
-			rte_mempool_put(vq->iotlb_pool, new_node);
+			vhost_user_iotlb_pool_put(vq, new_node);
 			goto unlock;
 		} else if (node->iova > new_node->iova) {
 			TAILQ_INSERT_BEFORE(node, new_node, next);
@@ -235,7 +256,7 @@ vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
 
 		if (iova < node->iova + node->size) {
 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
-			rte_mempool_put(vq->iotlb_pool, node);
+			vhost_user_iotlb_pool_put(vq, node);
 			vq->iotlb_cache_nr--;
 		}
 	}
@@ -295,7 +316,7 @@ vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
 int
 vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	char pool_name[RTE_MEMPOOL_NAMESIZE];
+	unsigned int i;
 	int socket = 0;
 
 	if (vq->iotlb_pool) {
@@ -304,6 +325,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq)
 		 * just drop all cached and pending entries.
 		 */
 		vhost_user_iotlb_flush_all(vq);
+		rte_free(vq->iotlb_pool);
 	}
 
 #ifdef RTE_LIBRTE_VHOST_NUMA
@@ -311,32 +333,32 @@ vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq)
 		socket = 0;
 #endif
 
+	rte_spinlock_init(&vq->iotlb_free_lock);
 	rte_rwlock_init(&vq->iotlb_lock);
 	rte_rwlock_init(&vq->iotlb_pending_lock);
 
+	SLIST_INIT(&vq->iotlb_free_list);
 	TAILQ_INIT(&vq->iotlb_list);
 	TAILQ_INIT(&vq->iotlb_pending_list);
 
-	snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d",
-			getpid(), dev->vid, vq->index);
-	VHOST_LOG_CONFIG(dev->ifname, DEBUG, "IOTLB cache name: %s\n", pool_name);
-
-	/* If already created, free it and recreate */
-	vq->iotlb_pool = rte_mempool_lookup(pool_name);
-	rte_mempool_free(vq->iotlb_pool);
-
-	vq->iotlb_pool = rte_mempool_create(pool_name,
-			IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0,
-			0, 0, NULL, NULL, NULL, socket,
-			RTE_MEMPOOL_F_NO_CACHE_ALIGN |
-			RTE_MEMPOOL_F_SP_PUT);
+	vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE,
+		sizeof(struct vhost_iotlb_entry), 0, socket);
 	if (!vq->iotlb_pool) {
-		VHOST_LOG_CONFIG(dev->ifname, ERR, "Failed to create IOTLB cache pool %s\n",
-			pool_name);
+		VHOST_LOG_CONFIG(dev->ifname, ERR,
+			"Failed to create IOTLB cache pool for vq %"PRIu32"\n",
+			vq->index);
 		return -1;
 	}
+	for (i = 0; i < IOTLB_CACHE_SIZE; i++)
+		vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]);
 
 	vq->iotlb_cache_nr = 0;
 
 	return 0;
 }
+
+void
+vhost_user_iotlb_destroy(struct vhost_virtqueue *vq)
+{
+	rte_free(vq->iotlb_pool);
+}
diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h
index 738e31e7b9..e27ebebcf5 100644
--- a/lib/vhost/iotlb.h
+++ b/lib/vhost/iotlb.h
@@ -48,5 +48,6 @@ void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova,
 						uint64_t size, uint8_t perm);
 void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq);
 int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq);
+void vhost_user_iotlb_destroy(struct vhost_virtqueue *vq);
 
 #endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 97bae0de91..0dfebace58 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -394,7 +394,7 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
 
 	vhost_free_async_mem(vq);
 	rte_free(vq->batch_copy_elems);
-	rte_mempool_free(vq->iotlb_pool);
+	vhost_user_iotlb_destroy(vq);
 	rte_free(vq->log_cache);
 	rte_free(vq);
 }
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index c6260b54cc..782d916ae0 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -299,10 +299,12 @@ struct vhost_virtqueue {
 
 	rte_rwlock_t	iotlb_lock;
 	rte_rwlock_t	iotlb_pending_lock;
-	struct rte_mempool *iotlb_pool;
+	struct vhost_iotlb_entry *iotlb_pool;
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
 	int				iotlb_cache_nr;
+	rte_spinlock_t	iotlb_free_lock;
+	SLIST_HEAD(, vhost_iotlb_entry) iotlb_free_list;
 
 	/* Used to notify the guest (trigger interrupt) */
 	int			callfd;
-- 
2.36.1


  reply	other threads:[~2022-07-22 13:53 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-22 13:53 [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand
2022-07-22 13:53 ` David Marchand [this message]
2022-07-22 14:00 ` David Marchand
2022-07-22 15:13 ` David Marchand
2022-07-25  7:11 ` [PATCH v2 " David Marchand
2022-07-25  7:11   ` [PATCH v2 2/2] vhost: stop using mempool for IOTLB cache David Marchand
2022-07-25 20:32 ` [PATCH v3 0/4] vHost IOTLB cache rework David Marchand
2022-07-25 20:32   ` [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation David Marchand
2022-07-26  7:55     ` Maxime Coquelin
2022-09-13 15:02       ` Maxime Coquelin
2022-09-14  1:05         ` Xia, Chenbo
2022-09-14  7:14           ` Maxime Coquelin
2022-09-14  9:15             ` Thomas Monjalon
2022-09-14  9:34               ` Maxime Coquelin
2022-09-14  9:45                 ` Thomas Monjalon
2022-09-14 11:48                   ` Maxime Coquelin
2022-07-25 20:32   ` [PATCH v3 2/4] vhost: make NUMA reallocation code more robust David Marchand
2022-07-26  8:39     ` Maxime Coquelin
2022-07-25 20:32   ` [PATCH v3 3/4] vhost: keep a reference to virtqueue index David Marchand
2022-07-26  8:52     ` Maxime Coquelin
2022-07-26 10:00       ` David Marchand
2022-07-25 20:32   ` [PATCH v3 4/4] vhost: stop using mempool for IOTLB cache David Marchand
2022-07-26  9:26     ` Maxime Coquelin
2023-02-17  7:42       ` Yuan, DukaiX
2022-09-15 16:02   ` [PATCH v3 0/4] vHost IOTLB cache rework Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220722135320.109269-2-david.marchand@redhat.com \
    --to=david.marchand@redhat.com \
    --cc=chenbo.xia@intel.com \
    --cc=dev@dpdk.org \
    --cc=maxime.coquelin@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).