DPDK patches and discussions
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: dev@dpdk.org, david.marchand@redhat.com, chenbo.xia@intel.com
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [dpdk-dev] [PATCH 5/6] vhost: allocate all data on same node as virtqueue
Date: Tue, 15 Jun 2021 10:33:07 +0200
Message-ID: <20210615083308.137401-6-maxime.coquelin@redhat.com> (raw)
In-Reply-To: <20210615083308.137401-1-maxime.coquelin@redhat.com>

This patch saves the NUMA node the virtqueue is allocated
on at init time, in order to allocate all other data on the
same node.

While most of the data are allocated before numa_realloc()
is called and so the data will be reallocated properly, some
data like the log cache are most likely allocated after.

For the virtio device metadata, we decide to allocate them
on the same node as the VQ 0.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vhost.c      | 34 ++++++++++++++++------------------
 lib/vhost/vhost.h      |  1 +
 lib/vhost/vhost_user.c | 40 +++++++++++++++++++++++++++-------------
 3 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index c96f6335c8..0000cd3297 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -261,7 +261,7 @@ vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	uint64_t src, dst;
 	uint64_t len, remain = desc_len;
 
-	idesc = rte_malloc(__func__, desc_len, 0);
+	idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
 	if (unlikely(!idesc))
 		return NULL;
 
@@ -549,6 +549,7 @@ static void
 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 {
 	struct vhost_virtqueue *vq;
+	int numa_node = SOCKET_ID_ANY;
 
 	if (vring_idx >= VHOST_MAX_VRING) {
 		VHOST_LOG_CONFIG(ERR,
@@ -570,6 +571,15 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
 	vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
 
+#ifdef RTE_LIBRTE_VHOST_NUMA
+	if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
+		VHOST_LOG_CONFIG(ERR, "(%d) failed to query numa node: %s\n",
+			dev->vid, rte_strerror(errno));
+		numa_node = SOCKET_ID_ANY;
+	}
+#endif
+	vq->numa_node = numa_node;
+
 	vhost_user_iotlb_init(dev, vring_idx);
 }
 
@@ -1616,7 +1626,6 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
 	struct vhost_virtqueue *vq;
 	struct virtio_net *dev = get_device(vid);
 	struct rte_vhost_async_features f;
-	int node;
 
 	if (dev == NULL || ops == NULL)
 		return -1;
@@ -1651,20 +1660,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
 		goto reg_out;
 	}
 
-#ifdef RTE_LIBRTE_VHOST_NUMA
-	if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
-		VHOST_LOG_CONFIG(ERR,
-			"unable to get numa information in async register. "
-			"allocating async buffer memory on the caller thread node\n");
-		node = SOCKET_ID_ANY;
-	}
-#else
-	node = SOCKET_ID_ANY;
-#endif
-
 	vq->async_pkts_info = rte_malloc_socket(NULL,
 			vq->size * sizeof(struct async_inflight_info),
-			RTE_CACHE_LINE_SIZE, node);
+			RTE_CACHE_LINE_SIZE, vq->numa_node);
 	if (!vq->async_pkts_info) {
 		vhost_free_async_mem(vq);
 		VHOST_LOG_CONFIG(ERR,
@@ -1675,7 +1673,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
 
 	vq->it_pool = rte_malloc_socket(NULL,
 			VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
-			RTE_CACHE_LINE_SIZE, node);
+			RTE_CACHE_LINE_SIZE, vq->numa_node);
 	if (!vq->it_pool) {
 		vhost_free_async_mem(vq);
 		VHOST_LOG_CONFIG(ERR,
@@ -1686,7 +1684,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
 
 	vq->vec_pool = rte_malloc_socket(NULL,
 			VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
-			RTE_CACHE_LINE_SIZE, node);
+			RTE_CACHE_LINE_SIZE, vq->numa_node);
 	if (!vq->vec_pool) {
 		vhost_free_async_mem(vq);
 		VHOST_LOG_CONFIG(ERR,
@@ -1698,7 +1696,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
 	if (vq_is_packed(dev)) {
 		vq->async_buffers_packed = rte_malloc_socket(NULL,
 			vq->size * sizeof(struct vring_used_elem_packed),
-			RTE_CACHE_LINE_SIZE, node);
+			RTE_CACHE_LINE_SIZE, vq->numa_node);
 		if (!vq->async_buffers_packed) {
 			vhost_free_async_mem(vq);
 			VHOST_LOG_CONFIG(ERR,
@@ -1709,7 +1707,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
 	} else {
 		vq->async_descs_split = rte_malloc_socket(NULL,
 			vq->size * sizeof(struct vring_used_elem),
-			RTE_CACHE_LINE_SIZE, node);
+			RTE_CACHE_LINE_SIZE, vq->numa_node);
 		if (!vq->async_descs_split) {
 			vhost_free_async_mem(vq);
 			VHOST_LOG_CONFIG(ERR,
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 8078ddff79..8ffe387556 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -164,6 +164,7 @@ struct vhost_virtqueue {
 
 	uint16_t		batch_copy_nb_elems;
 	struct batch_copy_elem	*batch_copy_elems;
+	int			numa_node;
 	bool			used_wrap_counter;
 	bool			avail_wrap_counter;
 
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index b298312db6..72879a36c8 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -433,10 +433,10 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
 	if (vq_is_packed(dev)) {
 		if (vq->shadow_used_packed)
 			rte_free(vq->shadow_used_packed);
-		vq->shadow_used_packed = rte_malloc(NULL,
+		vq->shadow_used_packed = rte_malloc_socket(NULL,
 				vq->size *
 				sizeof(struct vring_used_elem_packed),
-				RTE_CACHE_LINE_SIZE);
+				RTE_CACHE_LINE_SIZE, vq->numa_node);
 		if (!vq->shadow_used_packed) {
 			VHOST_LOG_CONFIG(ERR,
 					"failed to allocate memory for shadow used ring.\n");
@@ -447,9 +447,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
 		if (vq->shadow_used_split)
 			rte_free(vq->shadow_used_split);
 
-		vq->shadow_used_split = rte_malloc(NULL,
+		vq->shadow_used_split = rte_malloc_socket(NULL,
 				vq->size * sizeof(struct vring_used_elem),
-				RTE_CACHE_LINE_SIZE);
+				RTE_CACHE_LINE_SIZE, vq->numa_node);
 
 		if (!vq->shadow_used_split) {
 			VHOST_LOG_CONFIG(ERR,
@@ -460,9 +460,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
 
 	if (vq->batch_copy_elems)
 		rte_free(vq->batch_copy_elems);
-	vq->batch_copy_elems = rte_malloc(NULL,
+	vq->batch_copy_elems = rte_malloc_socket(NULL,
 				vq->size * sizeof(struct batch_copy_elem),
-				RTE_CACHE_LINE_SIZE);
+				RTE_CACHE_LINE_SIZE, vq->numa_node);
 	if (!vq->batch_copy_elems) {
 		VHOST_LOG_CONFIG(ERR,
 			"failed to allocate memory for batching copy.\n");
@@ -501,6 +501,9 @@ numa_realloc(struct virtio_net *dev, int index)
 		return dev;
 	}
 
+	if (node == vq->numa_node)
+		goto out_dev_realloc;
+
 	vq = rte_realloc_socket(vq, sizeof(*vq), 0, node);
 	if (!vq) {
 		VHOST_LOG_CONFIG(ERR, "Failed to realloc virtqueue %d on node %d\n",
@@ -556,6 +559,9 @@ numa_realloc(struct virtio_net *dev, int index)
 		vq->log_cache = lc;
 	}
 
+	vq->numa_node = node;
+
+out_dev_realloc:
 	dev = rte_realloc_socket(old_dev, sizeof(*dev), 0, node);
 	if (!dev) {
 		VHOST_LOG_CONFIG(ERR, "Failed to realloc dev on node %d\n", node);
@@ -1199,7 +1205,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 	struct virtio_net *dev = *pdev;
 	struct VhostUserMemory *memory = &msg->payload.memory;
 	struct rte_vhost_mem_region *reg;
-
+	int numa_node = SOCKET_ID_ANY;
 	uint64_t mmap_offset;
 	uint32_t i;
 
@@ -1239,13 +1245,21 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		for (i = 0; i < dev->nr_vring; i++)
 			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
 
+	/*
+	 * If VQ 0 has already been allocated, try to allocate on the same
+	 * NUMA node. It can be reallocated later in numa_realloc().
+	 */
+	if (dev->nr_vring > 0)
+		numa_node = dev->virtqueue[0]->numa_node;
+
 	dev->nr_guest_pages = 0;
 	if (dev->guest_pages == NULL) {
 		dev->max_guest_pages = 8;
-		dev->guest_pages = rte_zmalloc(NULL,
+		dev->guest_pages = rte_zmalloc_socket(NULL,
 					dev->max_guest_pages *
 					sizeof(struct guest_page),
-					RTE_CACHE_LINE_SIZE);
+					RTE_CACHE_LINE_SIZE,
+					numa_node);
 		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"(%d) failed to allocate memory "
@@ -1255,8 +1269,8 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		}
 	}
 
-	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
-		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
+	dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct rte_vhost_memory) +
+		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0, numa_node);
 	if (dev->mem == NULL) {
 		VHOST_LOG_CONFIG(ERR,
 			"(%d) failed to allocate memory for dev->mem\n",
@@ -2179,9 +2193,9 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		rte_free(vq->log_cache);
 		vq->log_cache = NULL;
 		vq->log_cache_nb_elem = 0;
-		vq->log_cache = rte_zmalloc("vq log cache",
+		vq->log_cache = rte_malloc_socket("vq log cache",
 				sizeof(struct log_cache_entry) * VHOST_LOG_CACHE_NR,
-				0);
+				0, vq->numa_node);
 		/*
 		 * If log cache alloc fail, don't fail migration, but no
 		 * caching will be done, which will impact performance
-- 
2.31.1


  parent reply	other threads:[~2021-06-15  8:33 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-15  8:33 [dpdk-dev] [PATCH 0/6] vhost: Fix and improve NUMA reallocation Maxime Coquelin
2021-06-15  8:33 ` [dpdk-dev] [PATCH 1/6] vhost: fix missing memory table NUMA realloc Maxime Coquelin
2021-06-15  8:33 ` [dpdk-dev] [PATCH 2/6] vhost: fix missing guest pages " Maxime Coquelin
2021-06-15  8:33 ` [dpdk-dev] [PATCH 3/6] vhost: fix missing cache logging " Maxime Coquelin
2021-06-15  8:33 ` [dpdk-dev] [PATCH 4/6] vhost: improve NUMA reallocation Maxime Coquelin
2021-06-15  8:33 ` Maxime Coquelin [this message]
2021-06-15  8:33 ` [dpdk-dev] [PATCH 6/6] vhost: convert inflight data to DPDK allocation API Maxime Coquelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210615083308.137401-6-maxime.coquelin@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=chenbo.xia@intel.com \
    --cc=david.marchand@redhat.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git