DPDK patches and discussions
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: dev@dpdk.org
Cc: maxime.coquelin@redhat.com, Yuanhan Liu <yliu@fridaylinux.org>,
	wei.w.wang@intel.com, mst@redhat.com, zhiyong.yang@intel.com,
	jasowang@redhat.com, Stefan Hajnoczi <stefanha@redhat.com>
Subject: [dpdk-dev] [RFC 13/24] vhost: move mmap/munmap to AF_UNIX transport
Date: Fri, 19 Jan 2018 13:44:33 +0000	[thread overview]
Message-ID: <20180119134444.24927-14-stefanha@redhat.com> (raw)
In-Reply-To: <20180119134444.24927-1-stefanha@redhat.com>

How mem table regions are mapped is transport-specific, so move the mmap
code into trans_af_unix.c.  The new .map_mem_table()/.unmap_mem_table()
interfaces allow transports to perform the mapping and unmapping.

Drop the "mmap align:" debug output because the alignment is no longer
available from vhost_user.c.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 lib/librte_vhost/vhost.h         | 17 +++++++
 lib/librte_vhost/vhost_user.h    |  3 ++
 lib/librte_vhost/trans_af_unix.c | 78 +++++++++++++++++++++++++++++++++
 lib/librte_vhost/vhost_user.c    | 95 ++++++++++------------------------------
 4 files changed, 121 insertions(+), 72 deletions(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 60e4d10bd..a50b802e7 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -315,6 +315,23 @@ struct vhost_transport_ops {
 	 */
 	int (*set_slave_req_fd)(struct virtio_net *dev,
 				struct VhostUserMsg *msg);
+
+	/**
+	 * Map memory table regions in dev->mem->regions[].
+	 *
+	 * @param dev
+	 *  vhost device
+	 */
+	int (*map_mem_regions)(struct virtio_net *dev);
+
+	/**
+	 * Unmap memory table regions in dev->mem->regions[] and free any
+	 * resources, such as file descriptors.
+	 *
+	 * @param dev
+	 *  vhost device
+	 */
+	void (*unmap_mem_regions)(struct virtio_net *dev);
 };
 
 /** The traditional AF_UNIX vhost-user protocol transport. */
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index dec658dff..4181f34c9 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -110,6 +110,9 @@ typedef struct VhostUserMsg {
 
 
 /* vhost_user.c */
+void vhost_add_guest_pages(struct virtio_net *dev,
+			   struct rte_vhost_mem_region *reg,
+			   uint64_t page_size);
 int vhost_user_msg_handler(int vid, const struct VhostUserMsg *msg);
 int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
 
diff --git a/lib/librte_vhost/trans_af_unix.c b/lib/librte_vhost/trans_af_unix.c
index 7128e121e..d3a5519b7 100644
--- a/lib/librte_vhost/trans_af_unix.c
+++ b/lib/librte_vhost/trans_af_unix.c
@@ -34,6 +34,8 @@
  */
 
 #include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
 #include <sys/un.h>
 #include <fcntl.h>
 
@@ -703,6 +705,80 @@ af_unix_vring_call(struct virtio_net *dev __rte_unused,
 	return 0;
 }
 
+static uint64_t
+get_blk_size(int fd)
+{
+	struct stat stat;
+	int ret;
+
+	ret = fstat(fd, &stat);
+	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
+}
+
+static int
+af_unix_map_mem_regions(struct virtio_net *dev)
+{
+	uint32_t i;
+
+	for (i = 0; i < dev->mem->nregions; i++) {
+		struct rte_vhost_mem_region *reg = &dev->mem->regions[i];
+		uint64_t mmap_size = reg->mmap_size;
+		uint64_t mmap_offset = mmap_size - reg->size;
+		uint64_t alignment;
+		void *mmap_addr;
+
+		/* mmap() without flag of MAP_ANONYMOUS, should be called
+		 * with length argument aligned with hugepagesz at older
+		 * longterm version Linux, like 2.6.32 and 3.2.72, or
+		 * mmap() will fail with EINVAL.
+		 *
+		 * to avoid failure, make sure in caller to keep length
+		 * aligned.
+		 */
+		alignment = get_blk_size(reg->fd);
+		if (alignment == (uint64_t)-1) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"couldn't get hugepage size through fstat\n");
+			return -1;
+		}
+		mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
+
+		mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+				 MAP_SHARED | MAP_POPULATE, reg->fd, 0);
+
+		if (mmap_addr == MAP_FAILED) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"mmap region %u failed.\n", i);
+			return -1;
+		}
+
+		reg->mmap_addr = mmap_addr;
+		reg->mmap_size = mmap_size;
+		reg->host_user_addr = (uint64_t)(uintptr_t)reg->mmap_addr +
+				      mmap_offset;
+
+		if (dev->dequeue_zero_copy)
+			vhost_add_guest_pages(dev, reg, alignment);
+	}
+
+	return 0;
+}
+
+static void
+af_unix_unmap_mem_regions(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct rte_vhost_mem_region *reg;
+
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+		if (reg->host_user_addr) {
+			munmap(reg->mmap_addr, reg->mmap_size);
+			close(reg->fd);
+		}
+	}
+}
+
 const struct vhost_transport_ops af_unix_trans_ops = {
 	.socket_size = sizeof(struct af_unix_socket),
 	.device_size = sizeof(struct vhost_user_connection),
@@ -714,4 +790,6 @@ const struct vhost_transport_ops af_unix_trans_ops = {
 	.send_reply = af_unix_send_reply,
 	.send_slave_req = af_unix_send_slave_req,
 	.set_slave_req_fd = af_unix_set_slave_req_fd,
+	.map_mem_regions = af_unix_map_mem_regions,
+	.unmap_mem_regions = af_unix_unmap_mem_regions,
 };
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index ee1b0a1a2..a819684b4 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -52,32 +52,13 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
 };
 
-static uint64_t
-get_blk_size(int fd)
-{
-	struct stat stat;
-	int ret;
-
-	ret = fstat(fd, &stat);
-	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
-}
-
 static void
 free_mem_region(struct virtio_net *dev)
 {
-	uint32_t i;
-	struct rte_vhost_mem_region *reg;
-
 	if (!dev || !dev->mem)
 		return;
 
-	for (i = 0; i < dev->mem->nregions; i++) {
-		reg = &dev->mem->regions[i];
-		if (reg->host_user_addr) {
-			munmap(reg->mmap_addr, reg->mmap_size);
-			close(reg->fd);
-		}
-	}
+	dev->trans_ops->unmap_mem_regions(dev);
 }
 
 void
@@ -516,9 +497,9 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	page->size = size;
 }
 
-static void
-add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
-		uint64_t page_size)
+void
+vhost_add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
+		      uint64_t page_size)
 {
 	uint64_t reg_size = reg->size;
 	uint64_t host_user_addr  = reg->host_user_addr;
@@ -602,19 +583,17 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 {
 	struct VhostUserMemory memory = pmsg->payload.memory;
 	struct rte_vhost_mem_region *reg;
-	void *mmap_addr;
-	uint64_t mmap_size;
-	uint64_t mmap_offset;
-	uint64_t alignment;
 	uint32_t i;
-	int fd;
 
 	if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) {
 		RTE_LOG(INFO, VHOST_CONFIG,
 			"(%d) memory regions not changed\n", dev->vid);
 
-		for (i = 0; i < memory.nregions; i++)
-			close(pmsg->fds[i]);
+		for (i = 0; i < memory.nregions; i++) {
+			if (pmsg->fds[i] >= 0) {
+				close(pmsg->fds[i]);
+			}
+		}
 
 		return 0;
 	}
@@ -649,50 +628,24 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 	}
 	dev->mem->nregions = memory.nregions;
 
+	/* Fill in dev->mem->regions[] */
 	for (i = 0; i < memory.nregions; i++) {
-		fd  = pmsg->fds[i];
 		reg = &dev->mem->regions[i];
 
 		reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
 		reg->guest_user_addr = memory.regions[i].userspace_addr;
 		reg->size            = memory.regions[i].memory_size;
-		reg->fd              = fd;
+		reg->mmap_size       = reg->size + memory.regions[i].mmap_offset;
+		reg->mmap_addr       = NULL;
+		reg->host_user_addr  = 0;
+		reg->fd              = pmsg->fds[i];
+	}
 
-		mmap_offset = memory.regions[i].mmap_offset;
-		mmap_size   = reg->size + mmap_offset;
+	if (dev->trans_ops->map_mem_regions(dev) < 0)
+		goto err;
 
-		/* mmap() without flag of MAP_ANONYMOUS, should be called
-		 * with length argument aligned with hugepagesz at older
-		 * longterm version Linux, like 2.6.32 and 3.2.72, or
-		 * mmap() will fail with EINVAL.
-		 *
-		 * to avoid failure, make sure in caller to keep length
-		 * aligned.
-		 */
-		alignment = get_blk_size(fd);
-		if (alignment == (uint64_t)-1) {
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"couldn't get hugepage size through fstat\n");
-			goto err_mmap;
-		}
-		mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
-
-		mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
-				 MAP_SHARED | MAP_POPULATE, fd, 0);
-
-		if (mmap_addr == MAP_FAILED) {
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"mmap region %u failed.\n", i);
-			goto err_mmap;
-		}
-
-		reg->mmap_addr = mmap_addr;
-		reg->mmap_size = mmap_size;
-		reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
-				      mmap_offset;
-
-		if (dev->dequeue_zero_copy)
-			add_guest_pages(dev, reg, alignment);
+	for (i = 0; i < memory.nregions; i++) {
+		reg = &dev->mem->regions[i];
 
 		RTE_LOG(INFO, VHOST_CONFIG,
 			"guest memory region %u, size: 0x%" PRIx64 "\n"
@@ -701,23 +654,21 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 			"\t host  virtual  addr: 0x%" PRIx64 "\n"
 			"\t mmap addr : 0x%" PRIx64 "\n"
 			"\t mmap size : 0x%" PRIx64 "\n"
-			"\t mmap align: 0x%" PRIx64 "\n"
 			"\t mmap off  : 0x%" PRIx64 "\n",
 			i, reg->size,
 			reg->guest_phys_addr,
 			reg->guest_user_addr,
 			reg->host_user_addr,
-			(uint64_t)(uintptr_t)mmap_addr,
-			mmap_size,
-			alignment,
-			mmap_offset);
+			(uint64_t)(uintptr_t)reg->mmap_addr,
+			reg->mmap_size,
+			memory.regions[i].mmap_offset);
 	}
 
 	dump_guest_pages(dev);
 
 	return 0;
 
-err_mmap:
+err:
 	free_mem_region(dev);
 	rte_free(dev->mem);
 	dev->mem = NULL;
-- 
2.14.3

  parent reply	other threads:[~2018-01-19 13:47 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-19 13:44 [dpdk-dev] [RFC 00/24] vhost: add virtio-vhost-user transport Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 01/24] vhost: move vring_call() into trans_af_unix.c Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 02/24] vhost: move AF_UNIX code from socket.c to trans_af_unix.c Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 03/24] vhost: allocate per-socket transport state Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 04/24] vhost: move socket_fd and un sockaddr into trans_af_unix.c Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 05/24] vhost: move start_server/client() calls to trans_af_unix.c Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 06/24] vhost: move vhost_user_connection " Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 07/24] vhost: move vhost_user_reconnect_init() into trans_af_unix.c Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 08/24] vhost: move vhost_user.fdset to trans_af_unix.c Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 09/24] vhost: pass vhost_transport_ops through vhost_new_device() Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 10/24] vhost: embed struct virtio_net inside struct vhost_user_connection Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 11/24] vhost: extract vhost_user.c socket I/O into transport Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 12/24] vhost: move slave_req_fd field to AF_UNIX transport Stefan Hajnoczi
2018-01-19 13:44 ` Stefan Hajnoczi [this message]
2018-01-19 13:44 ` [dpdk-dev] [RFC 14/24] vhost: move librte_vhost to drivers/ Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 15/24] vhost: add virtio pci framework Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 16/24] vhost: remember a vhost_virtqueue's queue index Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 17/24] vhost: add virtio-vhost-user transport Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 18/24] vhost: add RTE_VHOST_USER_VIRTIO_TRANSPORT flag Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 19/24] net/vhost: add virtio-vhost-user support Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 20/24] examples/vhost_scsi: add --socket-file argument Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 21/24] examples/vhost_scsi: add virtio-vhost-user support Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 22/24] usertools: add virtio-vhost-user devices to dpdk-devbind.py Stefan Hajnoczi
2018-01-19 13:44 ` [dpdk-dev] [RFC 23/24] WORKAROUND revert virtio-net mq vring deletion Stefan Hajnoczi
2018-01-30 17:52   ` Maxime Coquelin
2018-01-19 13:44 ` [dpdk-dev] [RFC 24/24] WORKAROUND examples/vhost_scsi: avoid broken EVENT_IDX Stefan Hajnoczi
2018-01-19 19:31   ` Michael S. Tsirkin
2018-01-31 10:02 ` [dpdk-dev] [RFC 00/24] vhost: add virtio-vhost-user transport Maxime Coquelin
     [not found] ` <20180410093847.GA22081@stefanha-x1.localdomain>
2018-04-10 14:56   ` Wang, Wei W

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180119134444.24927-14-stefanha@redhat.com \
    --to=stefanha@redhat.com \
    --cc=dev@dpdk.org \
    --cc=jasowang@redhat.com \
    --cc=maxime.coquelin@redhat.com \
    --cc=mst@redhat.com \
    --cc=wei.w.wang@intel.com \
    --cc=yliu@fridaylinux.org \
    --cc=zhiyong.yang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).