DPDK patches and discussions
 help / color / mirror / Atom feed
From: Nikos Dragazis <ndragazis@arrikto.com>
To: dev@dpdk.org
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>,
	Tiwei Bie <tiwei.bie@intel.com>,
	Zhihong Wang <zhihong.wang@intel.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Wei Wang <wei.w.wang@intel.com>,
	Stojaczyk Dariusz <dariusz.stojaczyk@intel.com>,
	Vangelis Koukis <vkoukis@arrikto.com>
Subject: [dpdk-dev] [PATCH 16/28] vhost: move postcopy live migration code
Date: Wed, 19 Jun 2019 18:14:41 +0300	[thread overview]
Message-ID: <1560957293-17294-17-git-send-email-ndragazis@arrikto.com> (raw)
In-Reply-To: <1560957293-17294-1-git-send-email-ndragazis@arrikto.com>

Postcopy live migration is an AF_UNIX-bound feature due to the
userfaultfd mechanism. Therefore, this patch moves the relevant code from
vhost_user.c to trans_af_unix.c and exposes this functionality via
transport-specific functions. Any other vhost-user transport
could potentially implement this feature by implementing these
transport-specific functions.

Signed-off-by: Nikos Dragazis <ndragazis@arrikto.com>
---
 lib/librte_vhost/trans_af_unix.c | 94 ++++++++++++++++++++++++++++++++++++++--
 lib/librte_vhost/vhost.c         |  1 -
 lib/librte_vhost/vhost.h         | 41 ++++++++++++++++--
 lib/librte_vhost/vhost_user.c    | 61 ++------------------------
 4 files changed, 131 insertions(+), 66 deletions(-)

diff --git a/lib/librte_vhost/trans_af_unix.c b/lib/librte_vhost/trans_af_unix.c
index a451880..4ccf9a7 100644
--- a/lib/librte_vhost/trans_af_unix.c
+++ b/lib/librte_vhost/trans_af_unix.c
@@ -10,6 +10,7 @@
 #include <sys/un.h>
 #include <sys/types.h>
 #include <sys/ioctl.h>
+#include <sys/syscall.h>
 #ifdef RTE_LIBRTE_VHOST_POSTCOPY
 #include <linux/userfaultfd.h>
 #endif
@@ -39,6 +40,9 @@ struct vhost_user_connection {
 	int slave_req_fd;
 	rte_spinlock_t slave_req_lock;
 
+	int postcopy_ufd;
+	int postcopy_listening;
+
 	TAILQ_ENTRY(vhost_user_connection) next;
 };
 
@@ -261,6 +265,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	conn->slave_req_fd = -1;
 	conn->vsocket = vsocket;
 	rte_spinlock_init(&conn->slave_req_lock);
+	conn->postcopy_ufd = -1;
 
 	size = strnlen(vsocket->path, PATH_MAX);
 	vhost_set_ifname(dev->vid, vsocket->path, size);
@@ -772,6 +777,13 @@ af_unix_cleanup_device(struct virtio_net *dev, int destroy __rte_unused)
 		close(conn->slave_req_fd);
 		conn->slave_req_fd = -1;
 	}
+
+	if (conn->postcopy_ufd >= 0) {
+		close(conn->postcopy_ufd);
+		conn->postcopy_ufd = -1;
+	}
+
+	conn->postcopy_listening = 0;
 }
 
 static int
@@ -866,7 +878,7 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
 			alignment,
 			mmap_offset);
 
-		if (dev->postcopy_listening) {
+		if (conn->postcopy_listening) {
 			/*
 			 * We haven't a better way right now than sharing
 			 * DPDK's virtual address with Qemu, so that Qemu can
@@ -877,7 +889,7 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
 		}
 	}
 
-	if (dev->postcopy_listening) {
+	if (conn->postcopy_listening) {
 		/* Send the addresses back to qemu */
 		msg->fd_num = 0;
 		/* Send reply */
@@ -918,11 +930,11 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
 			reg_struct.range.len = reg->mmap_size;
 			reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 
-			if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
+			if (ioctl(conn->postcopy_ufd, UFFDIO_REGISTER,
 						&reg_struct)) {
 				RTE_LOG(ERR, VHOST_CONFIG,
 					"Failed to register ufd for region %d: (ufd = %d) %s\n",
-					i, dev->postcopy_ufd,
+					i, conn->postcopy_ufd,
 					strerror(errno));
 				return -1;
 			}
@@ -990,6 +1002,77 @@ af_unix_set_log_base(struct virtio_net *dev, const struct VhostUserMsg *msg)
 	return 0;
 }
 
+static int
+af_unix_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	struct vhost_user_connection *conn =
+		container_of(dev, struct vhost_user_connection, device);
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+	struct uffdio_api api_struct;
+
+	conn->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+
+	if (conn->postcopy_ufd == -1) {
+		RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
+			strerror(errno));
+		return RTE_VHOST_MSG_RESULT_ERR;
+	}
+	api_struct.api = UFFD_API;
+	api_struct.features = 0;
+	if (ioctl(conn->postcopy_ufd, UFFDIO_API, &api_struct)) {
+		RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
+			strerror(errno));
+		close(conn->postcopy_ufd);
+		conn->postcopy_ufd = -1;
+		return RTE_VHOST_MSG_RESULT_ERR;
+	}
+	msg->fds[0] = conn->postcopy_ufd;
+	msg->fd_num = 1;
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+#else
+	conn->postcopy_ufd = -1;
+	msg->fd_num = 0;
+
+	return RTE_VHOST_MSG_RESULT_ERR;
+#endif
+}
+
+static int
+af_unix_set_postcopy_listen(struct virtio_net *dev)
+{
+	struct vhost_user_connection *conn =
+		container_of(dev, struct vhost_user_connection, device);
+
+	if (dev->mem && dev->mem->nregions) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Regions already registered at postcopy-listen\n");
+		return RTE_VHOST_MSG_RESULT_ERR;
+	}
+	conn->postcopy_listening = 1;
+
+	return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+af_unix_set_postcopy_end(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	struct vhost_user_connection *conn =
+		container_of(dev, struct vhost_user_connection, device);
+
+	conn->postcopy_listening = 0;
+	if (conn->postcopy_ufd >= 0) {
+		close(conn->postcopy_ufd);
+		conn->postcopy_ufd = -1;
+	}
+
+	msg->payload.u64 = 0;
+	msg->size = sizeof(msg->payload.u64);
+	msg->fd_num = 0;
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
 const struct vhost_transport_ops af_unix_trans_ops = {
 	.socket_size = sizeof(struct af_unix_socket),
 	.device_size = sizeof(struct vhost_user_connection),
@@ -1005,4 +1088,7 @@ const struct vhost_transport_ops af_unix_trans_ops = {
 	.map_mem_regions = af_unix_map_mem_regions,
 	.unmap_mem_regions = af_unix_unmap_mem_regions,
 	.set_log_base = af_unix_set_log_base,
+	.set_postcopy_advise = af_unix_set_postcopy_advise,
+	.set_postcopy_listen = af_unix_set_postcopy_listen,
+	.set_postcopy_end = af_unix_set_postcopy_end,
 };
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 5b16390..91a286d 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -512,7 +512,6 @@ vhost_new_device(const struct vhost_transport_ops *trans_ops)
 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
 	dev->trans_ops = trans_ops;
 	dev->vdpa_dev_id = -1;
-	dev->postcopy_ufd = -1;
 
 	return dev;
 }
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index b15d223..f5d6dc8 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -450,6 +450,44 @@ struct vhost_transport_ops {
 	 */
 	int (*set_log_base)(struct virtio_net *dev,
 			    const struct VhostUserMsg *msg);
+
+	/**
+	 * Register a userfault fd and send it to master.
+	 *
+	 * @param dev
+	 *  vhost device
+	 * @param msg
+	 *  message
+	 * @return
+	 *  RTE_VHOST_MSG_RESULT_REPLY on success,
+	 *  RTE_VHOST_MSG_RESULT_ERR on failure
+	 */
+	int (*set_postcopy_advise)(struct virtio_net *dev,
+				   struct VhostUserMsg *msg);
+
+	/**
+	 * Change live migration mode (entering postcopy mode).
+	 *
+	 * @param dev
+	 *  vhost device
+	 * @return
+	 *  RTE_VHOST_MSG_RESULT_OK on success,
+	 *  RTE_VHOST_MSG_RESULT_ERR on failure
+	 */
+	int (*set_postcopy_listen)(struct virtio_net *dev);
+
+	/**
+	 * Register completion of postcopy live migration.
+	 *
+	 * @param dev
+	 *  vhost device
+	 * @param msg
+	 *  message
+	 * @return
+	 *  RTE_VHOST_MSG_RESULT_REPLY
+	 */
+	int (*set_postcopy_end)(struct virtio_net *dev,
+				struct VhostUserMsg *msg);
 };
 
 /** The traditional AF_UNIX vhost-user protocol transport. */
@@ -492,9 +530,6 @@ struct virtio_net {
 	uint32_t		max_guest_pages;
 	struct guest_page       *guest_pages;
 
-	int			postcopy_ufd;
-	int			postcopy_listening;
-
 	/*
 	 * Device id to identify a specific backend device.
 	 * It's set to -1 for the default software implementation.
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index d3c9c5f..29c99e7 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -29,14 +29,10 @@
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <sys/syscall.h>
 #include <assert.h>
 #ifdef RTE_LIBRTE_VHOST_NUMA
 #include <numaif.h>
 #endif
-#ifdef RTE_LIBRTE_VHOST_POSTCOPY
-#include <linux/userfaultfd.h>
-#endif
 
 #include <rte_common.h>
 #include <rte_malloc.h>
@@ -136,13 +132,6 @@ vhost_backend_cleanup(struct virtio_net *dev)
 
 	free(dev->guest_pages);
 	dev->guest_pages = NULL;
-
-	if (dev->postcopy_ufd >= 0) {
-		close(dev->postcopy_ufd);
-		dev->postcopy_ufd = -1;
-	}
-
-	dev->postcopy_listening = 0;
 }
 
 /*
@@ -1471,35 +1460,8 @@ vhost_user_set_postcopy_advise(struct virtio_net **pdev,
 			struct VhostUserMsg *msg)
 {
 	struct virtio_net *dev = *pdev;
-#ifdef RTE_LIBRTE_VHOST_POSTCOPY
-	struct uffdio_api api_struct;
-
-	dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
-
-	if (dev->postcopy_ufd == -1) {
-		RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
-			strerror(errno));
-		return RTE_VHOST_MSG_RESULT_ERR;
-	}
-	api_struct.api = UFFD_API;
-	api_struct.features = 0;
-	if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
-		RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
-			strerror(errno));
-		close(dev->postcopy_ufd);
-		dev->postcopy_ufd = -1;
-		return RTE_VHOST_MSG_RESULT_ERR;
-	}
-	msg->fds[0] = dev->postcopy_ufd;
-	msg->fd_num = 1;
-
-	return RTE_VHOST_MSG_RESULT_REPLY;
-#else
-	dev->postcopy_ufd = -1;
-	msg->fd_num = 0;
 
-	return RTE_VHOST_MSG_RESULT_ERR;
-#endif
+	return dev->trans_ops->set_postcopy_advise(dev, msg);
 }
 
 static int
@@ -1508,14 +1470,7 @@ vhost_user_set_postcopy_listen(struct virtio_net **pdev,
 {
 	struct virtio_net *dev = *pdev;
 
-	if (dev->mem && dev->mem->nregions) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Regions already registered at postcopy-listen\n");
-		return RTE_VHOST_MSG_RESULT_ERR;
-	}
-	dev->postcopy_listening = 1;
-
-	return RTE_VHOST_MSG_RESULT_OK;
+	return dev->trans_ops->set_postcopy_listen(dev);
 }
 
 static int
@@ -1523,17 +1478,7 @@ vhost_user_postcopy_end(struct virtio_net **pdev, struct VhostUserMsg *msg)
 {
 	struct virtio_net *dev = *pdev;
 
-	dev->postcopy_listening = 0;
-	if (dev->postcopy_ufd >= 0) {
-		close(dev->postcopy_ufd);
-		dev->postcopy_ufd = -1;
-	}
-
-	msg->payload.u64 = 0;
-	msg->size = sizeof(msg->payload.u64);
-	msg->fd_num = 0;
-
-	return RTE_VHOST_MSG_RESULT_REPLY;
+	return dev->trans_ops->set_postcopy_end(dev, msg);
 }
 
 typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
-- 
2.7.4


  parent reply	other threads:[~2019-06-19 15:21 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-19 15:14 [dpdk-dev] [PATCH 00/28] vhost: add virtio-vhost-user transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 01/28] vhost: introduce vhost transport operations structure Nikos Dragazis
2019-06-19 20:14   ` Aaron Conole
2019-06-20 10:30     ` Bruce Richardson
2019-06-20 18:24       ` Nikos Dragazis
2019-06-20 18:19     ` Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 02/28] vhost: move socket management code Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 03/28] vhost: allocate per-socket transport state Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 04/28] vhost: move socket fd and un sockaddr Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 05/28] vhost: move start server/client calls Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 06/28] vhost: move vhost-user connection Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 07/28] vhost: move vhost-user reconnection Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 08/28] vhost: move vhost-user fdset Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 09/28] vhost: propagate vhost transport operations Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 10/28] vhost: use a single structure for the device state Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 11/28] vhost: extract socket I/O into transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 12/28] vhost: move slave request fd and lock Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 13/28] vhost: move mmap/munmap Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 14/28] vhost: move setup of the log memory region Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 15/28] vhost: remove main fd parameter from msg handlers Nikos Dragazis
2019-06-19 15:14 ` Nikos Dragazis [this message]
2019-06-19 15:14 ` [dpdk-dev] [PATCH 17/28] vhost: support registering additional vhost-user transports Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 18/28] drivers/virtio_vhost_user: add virtio PCI framework Nikos Dragazis
2019-09-05 16:34   ` Maxime Coquelin
2019-09-09  8:42     ` Nikos Dragazis
2019-09-09  8:44       ` Maxime Coquelin
2019-06-19 15:14 ` [dpdk-dev] [PATCH 19/28] vhost: add index field in vhost virtqueues Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 20/28] drivers: add virtio-vhost-user transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 21/28] drivers/virtio_vhost_user: use additional device resources Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 22/28] vhost: add flag for choosing vhost-user transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 23/28] net/vhost: add virtio-vhost-user support Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 24/28] examples/vhost_scsi: add --socket-file argument Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 25/28] examples/vhost_scsi: add virtio-vhost-user support Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 26/28] mk: link apps with virtio-vhost-user driver Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 27/28] config: add option for the virtio-vhost-user transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 28/28] usertools: add virtio-vhost-user devices to dpdk-devbind.py Nikos Dragazis
     [not found] ` <CGME20190620113240eucas1p22ca4faa64a36bbb7aec38a81298ade56@eucas1p2.samsung.com>
2019-06-20 11:32   ` [dpdk-dev] [PATCH 00/28] vhost: add virtio-vhost-user transport Ilya Maximets
2019-06-20 23:44     ` Nikos Dragazis
2019-06-20 11:35 ` Maxime Coquelin
2019-06-22 20:26   ` Nikos Dragazis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1560957293-17294-17-git-send-email-ndragazis@arrikto.com \
    --to=ndragazis@arrikto.com \
    --cc=dariusz.stojaczyk@intel.com \
    --cc=dev@dpdk.org \
    --cc=maxime.coquelin@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=tiwei.bie@intel.com \
    --cc=vkoukis@arrikto.com \
    --cc=wei.w.wang@intel.com \
    --cc=zhihong.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).