DPDK patches and discussions
 help / color / mirror / Atom feed
From: Yi Yang <yi.y.yang@intel.com>
To: yliu@fridaylinux.org
Cc: dev@dpdk.org, Yi Yang <yi.y.yang@intel.com>
Subject: [dpdk-dev] [PATCH] vhost: fix vhost_user_set_mem_table error
Date: Wed, 20 Sep 2017 16:32:03 +0800	[thread overview]
Message-ID: <1505896323-125943-1-git-send-email-yi.y.yang@intel.com> (raw)

Usually vhost_user message VHOST_USER_SET_MEM_TABLE
is only sent out during initialization and only sent
once, but it isn't so for memory hotplug and hotunplug
, for that case, vhost_user message VHOST_USER_SET_MEM_TABLE
will be resent with the old memory regions (not hotunplugged)
and the new memory regions (hotplugged), so current
vhost_user_set_mem_table implementation is wrong for
memory hotplug and hotunplug case, it will free current
memory regions and unmap hugepages no matter if they
are be using or not and if they are memory regions which
have been initialized and mapped in the previous vhost_user
message VHOST_USER_SET_MEM_TABLE or not.

This commit fixed this issue very well, it will keep them
intact for those old memory region it will unmap those
memroy regions if they have been hotunplugged, it will
initialize the new hotplugged memory regions and map
hugepages if they are hotplugged.

vhost_user message VHOST_USER_SET_MEM_TABLE will include
all the current effective memory regions, the hotunplugged
memory regions won't be included in VHOST_USER_SET_MEM_TABLE,
the hotplugged memroy regions will be included in
VHOST_USER_SET_MEM_TABLE.

This has been verified in OVS DPDK by memory hotplug and
hotunplug in qemu.

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
---
 lib/librte_vhost/vhost_user.c | 72 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 65 insertions(+), 7 deletions(-)

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index ad2e8d3..1c475d1 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -509,17 +509,43 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 	uint64_t mmap_size;
 	uint64_t mmap_offset;
 	uint64_t alignment;
-	uint32_t i;
+	uint32_t i, j;
 	int fd;
 
-	if (dev->mem) {
-		free_mem_region(dev);
-		rte_free(dev->mem);
-		dev->mem = NULL;
+	/* Handle hot unplug case */
+	if (dev->mem && dev->mem->nregions) {
+		i = 0;
+		j = 0;
+		while ((i < memory.nregions) && (j < dev->mem->nregions)) {
+			reg = &dev->mem->regions[j];
+			/* munmap this region if it is hot unplugged */
+			if (reg->guest_user_addr
+				!= memory.regions[i].userspace_addr) {
+				if (reg->host_user_addr) {
+					munmap(reg->mmap_addr, reg->mmap_size);
+					close(reg->fd);
+				}
+				reg->guest_user_addr = 0;
+				j++;
+			} else {
+				i++;
+				j++;
+			}
+		}
+
+		/* munmap these regions because they have been hot unplugged */
+		for (; j < dev->mem->nregions; j++) {
+			reg = &dev->mem->regions[j];
+			if (reg->host_user_addr) {
+				munmap(reg->mmap_addr, reg->mmap_size);
+				close(reg->fd);
+			}
+			reg->guest_user_addr = 0;
+		}
 	}
 
-	dev->nr_guest_pages = 0;
 	if (!dev->guest_pages) {
+		dev->nr_guest_pages = 0;
 		dev->max_guest_pages = 8;
 		dev->guest_pages = malloc(dev->max_guest_pages *
 						sizeof(struct guest_page));
@@ -532,7 +558,7 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 		}
 	}
 
-	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
+	dev->mem = rte_realloc(dev->mem, sizeof(struct rte_vhost_memory) +
 		sizeof(struct rte_vhost_mem_region) * memory.nregions, 0);
 	if (dev->mem == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
@@ -546,6 +572,38 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 		fd  = pmsg->fds[i];
 		reg = &dev->mem->regions[i];
 
+		/* This region should be skipped if it is initialized before */
+		if (reg->guest_user_addr == memory.regions[i].userspace_addr) {
+			alignment = get_blk_size(fd);
+			if (alignment == (uint64_t)-1) {
+				RTE_LOG(ERR, VHOST_CONFIG,
+					"couldn't get hugepage size "
+					"through fstat\n");
+				goto err_mmap;
+			}
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"guest memory region(old)"
+				" %u, size: 0x%" PRIx64 "\n"
+				"\t guest physical addr: 0x%" PRIx64 "\n"
+				"\t guest virtual  addr: 0x%" PRIx64 "\n"
+				"\t host  virtual  addr: 0x%" PRIx64 "\n"
+				"\t mmap addr : 0x%" PRIx64 "\n"
+				"\t mmap size : 0x%" PRIx64 "\n"
+				"\t mmap align: 0x%" PRIx64 "\n"
+				"\t mmap off  : 0x%" PRIx64 "\n",
+				i, reg->size,
+				reg->guest_phys_addr,
+				reg->guest_user_addr,
+				reg->host_user_addr,
+				(uint64_t)(uintptr_t)reg->mmap_addr,
+				reg->mmap_size,
+				alignment,
+				(uint64_t)(uintptr_t)reg->host_user_addr -
+					(uint64_t)(uintptr_t)reg->mmap_addr
+				);
+				continue;
+		}
+
 		reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
 		reg->guest_user_addr = memory.regions[i].userspace_addr;
 		reg->size            = memory.regions[i].memory_size;
-- 
2.1.0

             reply	other threads:[~2017-09-20  8:36 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-20  8:32 Yi Yang [this message]
2017-09-22  5:28 ` Tan, Jianfeng
2017-09-25  2:29   ` Yang, Yi
2017-09-25  4:46 ` Tiwei Bie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1505896323-125943-1-git-send-email-yi.y.yang@intel.com \
    --to=yi.y.yang@intel.com \
    --cc=dev@dpdk.org \
    --cc=yliu@fridaylinux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).