From: Nikos Dragazis <ndragazis@arrikto.com>
To: dev@dpdk.org
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>,
Tiwei Bie <tiwei.bie@intel.com>,
Zhihong Wang <zhihong.wang@intel.com>,
Stefan Hajnoczi <stefanha@redhat.com>,
Wei Wang <wei.w.wang@intel.com>,
Stojaczyk Dariusz <dariusz.stojaczyk@intel.com>,
Vangelis Koukis <vkoukis@arrikto.com>
Subject: [dpdk-dev] [PATCH 16/28] vhost: move postcopy live migration code
Date: Wed, 19 Jun 2019 18:14:41 +0300 [thread overview]
Message-ID: <1560957293-17294-17-git-send-email-ndragazis@arrikto.com> (raw)
In-Reply-To: <1560957293-17294-1-git-send-email-ndragazis@arrikto.com>
Postcopy live migration is an AF_UNIX-bound feature due to the
userfaultfd mechanism. Therefore, this patch moves the relevant code from
vhost_user.c to trans_af_unix.c and exposes this functionality via
transport-specific functions. Any other vhost-user transport
could potentially implement this feature by implementing these
transport-specific functions.
Signed-off-by: Nikos Dragazis <ndragazis@arrikto.com>
---
lib/librte_vhost/trans_af_unix.c | 94 ++++++++++++++++++++++++++++++++++++++--
lib/librte_vhost/vhost.c | 1 -
lib/librte_vhost/vhost.h | 41 ++++++++++++++++--
lib/librte_vhost/vhost_user.c | 61 ++------------------------
4 files changed, 131 insertions(+), 66 deletions(-)
diff --git a/lib/librte_vhost/trans_af_unix.c b/lib/librte_vhost/trans_af_unix.c
index a451880..4ccf9a7 100644
--- a/lib/librte_vhost/trans_af_unix.c
+++ b/lib/librte_vhost/trans_af_unix.c
@@ -10,6 +10,7 @@
#include <sys/un.h>
#include <sys/types.h>
#include <sys/ioctl.h>
+#include <sys/syscall.h>
#ifdef RTE_LIBRTE_VHOST_POSTCOPY
#include <linux/userfaultfd.h>
#endif
@@ -39,6 +40,9 @@ struct vhost_user_connection {
int slave_req_fd;
rte_spinlock_t slave_req_lock;
+ int postcopy_ufd;
+ int postcopy_listening;
+
TAILQ_ENTRY(vhost_user_connection) next;
};
@@ -261,6 +265,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
conn->slave_req_fd = -1;
conn->vsocket = vsocket;
rte_spinlock_init(&conn->slave_req_lock);
+ conn->postcopy_ufd = -1;
size = strnlen(vsocket->path, PATH_MAX);
vhost_set_ifname(dev->vid, vsocket->path, size);
@@ -772,6 +777,13 @@ af_unix_cleanup_device(struct virtio_net *dev, int destroy __rte_unused)
close(conn->slave_req_fd);
conn->slave_req_fd = -1;
}
+
+ if (conn->postcopy_ufd >= 0) {
+ close(conn->postcopy_ufd);
+ conn->postcopy_ufd = -1;
+ }
+
+ conn->postcopy_listening = 0;
}
static int
@@ -866,7 +878,7 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
alignment,
mmap_offset);
- if (dev->postcopy_listening) {
+ if (conn->postcopy_listening) {
/*
* We haven't a better way right now than sharing
* DPDK's virtual address with Qemu, so that Qemu can
@@ -877,7 +889,7 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
}
}
- if (dev->postcopy_listening) {
+ if (conn->postcopy_listening) {
/* Send the addresses back to qemu */
msg->fd_num = 0;
/* Send reply */
@@ -918,11 +930,11 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
reg_struct.range.len = reg->mmap_size;
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
+ if (ioctl(conn->postcopy_ufd, UFFDIO_REGISTER,
®_struct)) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to register ufd for region %d: (ufd = %d) %s\n",
- i, dev->postcopy_ufd,
+ i, conn->postcopy_ufd,
strerror(errno));
return -1;
}
@@ -990,6 +1002,77 @@ af_unix_set_log_base(struct virtio_net *dev, const struct VhostUserMsg *msg)
return 0;
}
+static int
+af_unix_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+ struct vhost_user_connection *conn =
+ container_of(dev, struct vhost_user_connection, device);
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+ struct uffdio_api api_struct;
+
+ conn->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+
+ if (conn->postcopy_ufd == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
+ strerror(errno));
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ api_struct.api = UFFD_API;
+ api_struct.features = 0;
+ if (ioctl(conn->postcopy_ufd, UFFDIO_API, &api_struct)) {
+ RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
+ strerror(errno));
+ close(conn->postcopy_ufd);
+ conn->postcopy_ufd = -1;
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ msg->fds[0] = conn->postcopy_ufd;
+ msg->fd_num = 1;
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+#else
+ conn->postcopy_ufd = -1;
+ msg->fd_num = 0;
+
+ return RTE_VHOST_MSG_RESULT_ERR;
+#endif
+}
+
+static int
+af_unix_set_postcopy_listen(struct virtio_net *dev)
+{
+ struct vhost_user_connection *conn =
+ container_of(dev, struct vhost_user_connection, device);
+
+ if (dev->mem && dev->mem->nregions) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Regions already registered at postcopy-listen\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ conn->postcopy_listening = 1;
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+af_unix_set_postcopy_end(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+ struct vhost_user_connection *conn =
+ container_of(dev, struct vhost_user_connection, device);
+
+ conn->postcopy_listening = 0;
+ if (conn->postcopy_ufd >= 0) {
+ close(conn->postcopy_ufd);
+ conn->postcopy_ufd = -1;
+ }
+
+ msg->payload.u64 = 0;
+ msg->size = sizeof(msg->payload.u64);
+ msg->fd_num = 0;
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
const struct vhost_transport_ops af_unix_trans_ops = {
.socket_size = sizeof(struct af_unix_socket),
.device_size = sizeof(struct vhost_user_connection),
@@ -1005,4 +1088,7 @@ const struct vhost_transport_ops af_unix_trans_ops = {
.map_mem_regions = af_unix_map_mem_regions,
.unmap_mem_regions = af_unix_unmap_mem_regions,
.set_log_base = af_unix_set_log_base,
+ .set_postcopy_advise = af_unix_set_postcopy_advise,
+ .set_postcopy_listen = af_unix_set_postcopy_listen,
+ .set_postcopy_end = af_unix_set_postcopy_end,
};
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 5b16390..91a286d 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -512,7 +512,6 @@ vhost_new_device(const struct vhost_transport_ops *trans_ops)
dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
dev->trans_ops = trans_ops;
dev->vdpa_dev_id = -1;
- dev->postcopy_ufd = -1;
return dev;
}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index b15d223..f5d6dc8 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -450,6 +450,44 @@ struct vhost_transport_ops {
*/
int (*set_log_base)(struct virtio_net *dev,
const struct VhostUserMsg *msg);
+
+ /**
+ * Register a userfault fd and send it to master.
+ *
+ * @param dev
+ * vhost device
+ * @param msg
+ * message
+ * @return
+ * RTE_VHOST_MSG_RESULT_REPLY on success,
+ * RTE_VHOST_MSG_RESULT_ERR on failure
+ */
+ int (*set_postcopy_advise)(struct virtio_net *dev,
+ struct VhostUserMsg *msg);
+
+ /**
+ * Change live migration mode (entering postcopy mode).
+ *
+ * @param dev
+ * vhost device
+ * @return
+ * RTE_VHOST_MSG_RESULT_OK on success,
+ * RTE_VHOST_MSG_RESULT_ERR on failure
+ */
+ int (*set_postcopy_listen)(struct virtio_net *dev);
+
+ /**
+ * Register completion of postcopy live migration.
+ *
+ * @param dev
+ * vhost device
+ * @param msg
+ * message
+ * @return
+ * RTE_VHOST_MSG_RESULT_REPLY
+ */
+ int (*set_postcopy_end)(struct virtio_net *dev,
+ struct VhostUserMsg *msg);
};
/** The traditional AF_UNIX vhost-user protocol transport. */
@@ -492,9 +530,6 @@ struct virtio_net {
uint32_t max_guest_pages;
struct guest_page *guest_pages;
- int postcopy_ufd;
- int postcopy_listening;
-
/*
* Device id to identify a specific backend device.
* It's set to -1 for the default software implementation.
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index d3c9c5f..29c99e7 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -29,14 +29,10 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/syscall.h>
#include <assert.h>
#ifdef RTE_LIBRTE_VHOST_NUMA
#include <numaif.h>
#endif
-#ifdef RTE_LIBRTE_VHOST_POSTCOPY
-#include <linux/userfaultfd.h>
-#endif
#include <rte_common.h>
#include <rte_malloc.h>
@@ -136,13 +132,6 @@ vhost_backend_cleanup(struct virtio_net *dev)
free(dev->guest_pages);
dev->guest_pages = NULL;
-
- if (dev->postcopy_ufd >= 0) {
- close(dev->postcopy_ufd);
- dev->postcopy_ufd = -1;
- }
-
- dev->postcopy_listening = 0;
}
/*
@@ -1471,35 +1460,8 @@ vhost_user_set_postcopy_advise(struct virtio_net **pdev,
struct VhostUserMsg *msg)
{
struct virtio_net *dev = *pdev;
-#ifdef RTE_LIBRTE_VHOST_POSTCOPY
- struct uffdio_api api_struct;
-
- dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
-
- if (dev->postcopy_ufd == -1) {
- RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
- strerror(errno));
- return RTE_VHOST_MSG_RESULT_ERR;
- }
- api_struct.api = UFFD_API;
- api_struct.features = 0;
- if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
- RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
- strerror(errno));
- close(dev->postcopy_ufd);
- dev->postcopy_ufd = -1;
- return RTE_VHOST_MSG_RESULT_ERR;
- }
- msg->fds[0] = dev->postcopy_ufd;
- msg->fd_num = 1;
-
- return RTE_VHOST_MSG_RESULT_REPLY;
-#else
- dev->postcopy_ufd = -1;
- msg->fd_num = 0;
- return RTE_VHOST_MSG_RESULT_ERR;
-#endif
+ return dev->trans_ops->set_postcopy_advise(dev, msg);
}
static int
@@ -1508,14 +1470,7 @@ vhost_user_set_postcopy_listen(struct virtio_net **pdev,
{
struct virtio_net *dev = *pdev;
- if (dev->mem && dev->mem->nregions) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Regions already registered at postcopy-listen\n");
- return RTE_VHOST_MSG_RESULT_ERR;
- }
- dev->postcopy_listening = 1;
-
- return RTE_VHOST_MSG_RESULT_OK;
+ return dev->trans_ops->set_postcopy_listen(dev);
}
static int
@@ -1523,17 +1478,7 @@ vhost_user_postcopy_end(struct virtio_net **pdev, struct VhostUserMsg *msg)
{
struct virtio_net *dev = *pdev;
- dev->postcopy_listening = 0;
- if (dev->postcopy_ufd >= 0) {
- close(dev->postcopy_ufd);
- dev->postcopy_ufd = -1;
- }
-
- msg->payload.u64 = 0;
- msg->size = sizeof(msg->payload.u64);
- msg->fd_num = 0;
-
- return RTE_VHOST_MSG_RESULT_REPLY;
+ return dev->trans_ops->set_postcopy_end(dev, msg);
}
typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
--
2.7.4
next prev parent reply other threads:[~2019-06-19 15:21 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-19 15:14 [dpdk-dev] [PATCH 00/28] vhost: add virtio-vhost-user transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 01/28] vhost: introduce vhost transport operations structure Nikos Dragazis
2019-06-19 20:14 ` Aaron Conole
2019-06-20 10:30 ` Bruce Richardson
2019-06-20 18:24 ` Nikos Dragazis
2019-06-20 18:19 ` Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 02/28] vhost: move socket management code Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 03/28] vhost: allocate per-socket transport state Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 04/28] vhost: move socket fd and un sockaddr Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 05/28] vhost: move start server/client calls Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 06/28] vhost: move vhost-user connection Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 07/28] vhost: move vhost-user reconnection Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 08/28] vhost: move vhost-user fdset Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 09/28] vhost: propagate vhost transport operations Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 10/28] vhost: use a single structure for the device state Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 11/28] vhost: extract socket I/O into transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 12/28] vhost: move slave request fd and lock Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 13/28] vhost: move mmap/munmap Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 14/28] vhost: move setup of the log memory region Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 15/28] vhost: remove main fd parameter from msg handlers Nikos Dragazis
2019-06-19 15:14 ` Nikos Dragazis [this message]
2019-06-19 15:14 ` [dpdk-dev] [PATCH 17/28] vhost: support registering additional vhost-user transports Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 18/28] drivers/virtio_vhost_user: add virtio PCI framework Nikos Dragazis
2019-09-05 16:34 ` Maxime Coquelin
2019-09-09 8:42 ` Nikos Dragazis
2019-09-09 8:44 ` Maxime Coquelin
2019-06-19 15:14 ` [dpdk-dev] [PATCH 19/28] vhost: add index field in vhost virtqueues Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 20/28] drivers: add virtio-vhost-user transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 21/28] drivers/virtio_vhost_user: use additional device resources Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 22/28] vhost: add flag for choosing vhost-user transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 23/28] net/vhost: add virtio-vhost-user support Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 24/28] examples/vhost_scsi: add --socket-file argument Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 25/28] examples/vhost_scsi: add virtio-vhost-user support Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 26/28] mk: link apps with virtio-vhost-user driver Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 27/28] config: add option for the virtio-vhost-user transport Nikos Dragazis
2019-06-19 15:14 ` [dpdk-dev] [PATCH 28/28] usertools: add virtio-vhost-user devices to dpdk-devbind.py Nikos Dragazis
[not found] ` <CGME20190620113240eucas1p22ca4faa64a36bbb7aec38a81298ade56@eucas1p2.samsung.com>
2019-06-20 11:32 ` [dpdk-dev] [PATCH 00/28] vhost: add virtio-vhost-user transport Ilya Maximets
2019-06-20 23:44 ` Nikos Dragazis
2019-06-20 11:35 ` Maxime Coquelin
2019-06-22 20:26 ` Nikos Dragazis
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1560957293-17294-17-git-send-email-ndragazis@arrikto.com \
--to=ndragazis@arrikto.com \
--cc=dariusz.stojaczyk@intel.com \
--cc=dev@dpdk.org \
--cc=maxime.coquelin@redhat.com \
--cc=stefanha@redhat.com \
--cc=tiwei.bie@intel.com \
--cc=vkoukis@arrikto.com \
--cc=wei.w.wang@intel.com \
--cc=zhihong.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).