* [dpdk-dev] [PATCH 01/12] vhost: define postcopy protocol flag
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
@ 2018-09-26 7:26 ` Maxime Coquelin
2018-09-26 7:26 ` [dpdk-dev] [PATCH 02/12] vhost: add number of fds to vhost-user messages and use it Maxime Coquelin
` (10 subsequent siblings)
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:26 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/rte_vhost.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index b02673d4a..b3cc6990d 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -66,6 +66,10 @@ extern "C" {
#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
#endif
+#ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT
+#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
+#endif
+
/** Indicate whether protocol features negotiation is supported. */
#ifndef VHOST_USER_F_PROTOCOL_FEATURES
#define VHOST_USER_F_PROTOCOL_FEATURES 30
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 02/12] vhost: add number of fds to vhost-user messages and use it
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
2018-09-26 7:26 ` [dpdk-dev] [PATCH 01/12] vhost: define postcopy protocol flag Maxime Coquelin
@ 2018-09-26 7:26 ` Maxime Coquelin
2018-09-26 7:26 ` [dpdk-dev] [PATCH 03/12] vhost: enable fds passing when sending vhost-user messages Maxime Coquelin
` (9 subsequent siblings)
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:26 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
As soons as some anciliarry datai (fds) are received, it is copied
without checking its length.
This patch adds adds the number of fds received to the message,
which is set in read_vhost_message().
This is preliminary work to support sending fds to Qemu.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/socket.c | 21 ++++++++++++++++-----
lib/librte_vhost/vhost_user.c | 2 +-
lib/librte_vhost/vhost_user.h | 4 +++-
3 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index d63031747..c04d3d305 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -94,18 +94,23 @@ static struct vhost_user vhost_user = {
.mutex = PTHREAD_MUTEX_INITIALIZER,
};
-/* return bytes# of read on success or negative val on failure. */
+/*
+ * return bytes# of read on success or negative val on failure. Update fdnum
+ * with number of fds read.
+ */
int
-read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
+ int *fd_num)
{
struct iovec iov;
struct msghdr msgh;
- size_t fdsize = fd_num * sizeof(int);
- char control[CMSG_SPACE(fdsize)];
+ char control[CMSG_SPACE(max_fds * sizeof(int))];
struct cmsghdr *cmsg;
int got_fds = 0;
int ret;
+ *fd_num = 0;
+
memset(&msgh, 0, sizeof(msgh));
iov.iov_base = buf;
iov.iov_len = buflen;
@@ -131,13 +136,19 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
if ((cmsg->cmsg_level == SOL_SOCKET) &&
(cmsg->cmsg_type == SCM_RIGHTS)) {
got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+ if (got_fds > max_fds) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Received msg contains more fds than supported\n");
+ return -1;
+ }
+ *fd_num = got_fds;
memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
break;
}
}
/* Clear out unused file descriptors */
- while (got_fds < fd_num)
+ while (got_fds < max_fds)
fds[got_fds++] = -1;
return ret;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 63d145b2d..a07b26d99 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1418,7 +1418,7 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
int ret;
ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
- msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+ msg->fds, VHOST_MEMORY_MAX_NREGIONS, &msg->fd_num);
if (ret <= 0)
return ret;
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 42166adf2..dd0262f8f 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -132,6 +132,7 @@ typedef struct VhostUserMsg {
VhostUserVringArea area;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
+ int fd_num;
} __attribute((packed)) VhostUserMsg;
#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
@@ -146,7 +147,8 @@ int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
int vhost_user_host_notifier_ctrl(int vid, bool enable);
/* socket.c */
-int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
+ int *fd_num);
int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
#endif
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 03/12] vhost: enable fds passing when sending vhost-user messages
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
2018-09-26 7:26 ` [dpdk-dev] [PATCH 01/12] vhost: define postcopy protocol flag Maxime Coquelin
2018-09-26 7:26 ` [dpdk-dev] [PATCH 02/12] vhost: add number of fds to vhost-user messages and use it Maxime Coquelin
@ 2018-09-26 7:26 ` Maxime Coquelin
2018-09-26 7:26 ` [dpdk-dev] [PATCH 04/12] vhost: introduce postcopy's advise message Maxime Coquelin
` (8 subsequent siblings)
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:26 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
Passing userfault fds to Qemu will be required for postcopy
live-migration feature.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/vhost_user.c | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index a07b26d99..a9b429598 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1171,6 +1171,7 @@ vhost_user_get_protocol_features(struct virtio_net *dev,
msg->payload.u64 = protocol_features;
msg->size = sizeof(msg->payload.u64);
+ msg->fd_num = 0;
}
static int
@@ -1442,13 +1443,13 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
}
static int
-send_vhost_message(int sockfd, struct VhostUserMsg *msg, int *fds, int fd_num)
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
{
if (!msg)
return 0;
return send_fd_message(sockfd, (char *)msg,
- VHOST_USER_HDR_SIZE + msg->size, fds, fd_num);
+ VHOST_USER_HDR_SIZE + msg->size, msg->fds, msg->fd_num);
}
static int
@@ -1462,19 +1463,18 @@ send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
msg->flags |= VHOST_USER_VERSION;
msg->flags |= VHOST_USER_REPLY_MASK;
- return send_vhost_message(sockfd, msg, NULL, 0);
+ return send_vhost_message(sockfd, msg);
}
static int
-send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg,
- int *fds, int fd_num)
+send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg)
{
int ret;
if (msg->flags & VHOST_USER_NEED_REPLY)
rte_spinlock_lock(&dev->slave_req_lock);
- ret = send_vhost_message(dev->slave_req_fd, msg, fds, fd_num);
+ ret = send_vhost_message(dev->slave_req_fd, msg);
if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY))
rte_spinlock_unlock(&dev->slave_req_lock);
@@ -1659,6 +1659,7 @@ vhost_user_msg_handler(int vid, int fd)
case VHOST_USER_GET_FEATURES:
msg.payload.u64 = vhost_user_get_features(dev);
msg.size = sizeof(msg.payload.u64);
+ msg.fd_num = 0;
send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_FEATURES:
@@ -1690,6 +1691,7 @@ vhost_user_msg_handler(int vid, int fd)
goto skip_to_reply;
/* it needs a reply */
msg.size = sizeof(msg.payload.u64);
+ msg.fd_num = 0;
send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_LOG_FD:
@@ -1731,6 +1733,7 @@ vhost_user_msg_handler(int vid, int fd)
case VHOST_USER_GET_QUEUE_NUM:
msg.payload.u64 = (uint64_t)vhost_user_get_queue_num(dev);
msg.size = sizeof(msg.payload.u64);
+ msg.fd_num = 0;
send_vhost_reply(fd, &msg);
break;
@@ -1778,6 +1781,7 @@ vhost_user_msg_handler(int vid, int fd)
if (msg.flags & VHOST_USER_NEED_REPLY) {
msg.payload.u64 = !!ret;
msg.size = sizeof(msg.payload.u64);
+ msg.fd_num = 0;
send_vhost_reply(fd, &msg);
} else if (ret) {
RTE_LOG(ERR, VHOST_CONFIG,
@@ -1861,7 +1865,7 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
},
};
- ret = send_vhost_message(dev->slave_req_fd, &msg, NULL, 0);
+ ret = send_vhost_message(dev->slave_req_fd, &msg);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to send IOTLB miss message (%d)\n",
@@ -1877,8 +1881,6 @@ static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
uint64_t offset,
uint64_t size)
{
- int *fdp = NULL;
- size_t fd_num = 0;
int ret;
struct VhostUserMsg msg = {
.request.slave = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
@@ -1894,11 +1896,11 @@ static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
if (fd < 0)
msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
else {
- fdp = &fd;
- fd_num = 1;
+ msg.fds[0] = fd;
+ msg.fd_num = 1;
}
- ret = send_vhost_slave_message(dev, &msg, fdp, fd_num);
+ ret = send_vhost_slave_message(dev, &msg);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to set host notifier (%d)\n", ret);
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 04/12] vhost: introduce postcopy's advise message
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
` (2 preceding siblings ...)
2018-09-26 7:26 ` [dpdk-dev] [PATCH 03/12] vhost: enable fds passing when sending vhost-user messages Maxime Coquelin
@ 2018-09-26 7:26 ` Maxime Coquelin
2018-09-26 15:22 ` Alejandro Lucero
[not found] ` <CGME20180927082608eucas1p17cd1d99e54134fb2a6de3151e52048f3@eucas1p1.samsung.com>
2018-09-26 7:26 ` [dpdk-dev] [PATCH 05/12] vhost: add support for postcopy's listen message Maxime Coquelin
` (7 subsequent siblings)
11 siblings, 2 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:26 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
This patch opens a userfaultfd and sends it back to Qemu's
VHOST_USER_POSTCOPY_ADVISE request.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/vhost.h | 2 ++
lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
lib/librte_vhost/vhost_user.h | 3 ++-
3 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 25ffd7614..21722d8a8 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -363,6 +363,8 @@ struct virtio_net {
int slave_req_fd;
rte_spinlock_t slave_req_lock;
+ int postcopy_ufd;
+
/*
* Device id to identify a specific backend device.
* It's set to -1 for the default software implementation.
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index a9b429598..bdfe2cac0 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -24,9 +24,13 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <fcntl.h>
+#include <linux/userfaultfd.h>
+#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <assert.h>
#ifdef RTE_LIBRTE_VHOST_NUMA
#include <numaif.h>
@@ -69,6 +73,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG",
[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
+ [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE",
};
static uint64_t
@@ -1412,6 +1417,33 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
return 0;
}
+static int
+vhost_user_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+ struct uffdio_api api_struct;
+
+
+ dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+
+ if (dev->postcopy_ufd == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ api_struct.api = UFFD_API;
+ api_struct.features = 0;
+ if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
+ RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
+ strerror(errno));
+ close(dev->postcopy_ufd);
+ return -1;
+ }
+ msg->fds[0] = dev->postcopy_ufd;
+ msg->fd_num = 1;
+
+ return 0;
+}
+
/* return bytes# of read on success or negative val on failure. */
static int
read_vhost_message(int sockfd, struct VhostUserMsg *msg)
@@ -1756,6 +1788,11 @@ vhost_user_msg_handler(int vid, int fd)
ret = vhost_user_iotlb_msg(&dev, &msg);
break;
+ case VHOST_USER_POSTCOPY_ADVISE:
+ vhost_user_set_postcopy_advise(dev, &msg);
+ send_vhost_reply(fd, &msg);
+ break;
+
default:
ret = -1;
break;
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index dd0262f8f..2030b40a5 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -50,7 +50,8 @@ typedef enum VhostUserRequest {
VHOST_USER_IOTLB_MSG = 22,
VHOST_USER_CRYPTO_CREATE_SESS = 26,
VHOST_USER_CRYPTO_CLOSE_SESS = 27,
- VHOST_USER_MAX = 28
+ VHOST_USER_POSTCOPY_ADVISE = 28,
+ VHOST_USER_MAX = 29
} VhostUserRequest;
typedef enum VhostUserSlaveRequest {
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH 04/12] vhost: introduce postcopy's advise message
2018-09-26 7:26 ` [dpdk-dev] [PATCH 04/12] vhost: introduce postcopy's advise message Maxime Coquelin
@ 2018-09-26 15:22 ` Alejandro Lucero
2018-09-27 9:35 ` Maxime Coquelin
[not found] ` <CGME20180927082608eucas1p17cd1d99e54134fb2a6de3151e52048f3@eucas1p1.samsung.com>
1 sibling, 1 reply; 23+ messages in thread
From: Alejandro Lucero @ 2018-09-26 15:22 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: dev, tiwei.bie, zhihong.wang, jfreimann, dgilbert
On Wed, Sep 26, 2018 at 8:27 AM Maxime Coquelin <maxime.coquelin@redhat.com>
wrote:
> This patch opens a userfaultfd and sends it back to Qemu's
> VHOST_USER_POSTCOPY_ADVISE request.
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/librte_vhost/vhost.h | 2 ++
> lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
> lib/librte_vhost/vhost_user.h | 3 ++-
> 3 files changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 25ffd7614..21722d8a8 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -363,6 +363,8 @@ struct virtio_net {
> int slave_req_fd;
> rte_spinlock_t slave_req_lock;
>
> + int postcopy_ufd;
> +
> /*
> * Device id to identify a specific backend device.
> * It's set to -1 for the default software implementation.
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index a9b429598..bdfe2cac0 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -24,9 +24,13 @@
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> +#include <fcntl.h>
> +#include <linux/userfaultfd.h>
> +#include <sys/ioctl.h>
> #include <sys/mman.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> +#include <sys/syscall.h>
> #include <assert.h>
> #ifdef RTE_LIBRTE_VHOST_NUMA
> #include <numaif.h>
> @@ -69,6 +73,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
> [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG",
> [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
> [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
> + [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE",
> };
>
> static uint64_t
> @@ -1412,6 +1417,33 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
> struct VhostUserMsg *msg)
> return 0;
> }
>
> +static int
> +vhost_user_set_postcopy_advise(struct virtio_net *dev, struct
> VhostUserMsg *msg)
> +{
> + struct uffdio_api api_struct;
> +
> +
> + dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC |
> O_NONBLOCK);
> +
> + if (dev->postcopy_ufd == -1) {
> + RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available:
> %s\n",
> + strerror(errno));
> + return -1;
> + }
> + api_struct.api = UFFD_API;
> + api_struct.features = 0;
> + if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
> + RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure:
> %s\n",
> + strerror(errno));
> + close(dev->postcopy_ufd);
> + return -1;
> + }
> + msg->fds[0] = dev->postcopy_ufd;
> + msg->fd_num = 1;
> +
> + return 0;
> +}
> +
> /* return bytes# of read on success or negative val on failure. */
> static int
> read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> @@ -1756,6 +1788,11 @@ vhost_user_msg_handler(int vid, int fd)
> ret = vhost_user_iotlb_msg(&dev, &msg);
> break;
>
> + case VHOST_USER_POSTCOPY_ADVISE:
> + vhost_user_set_postcopy_advise(dev, &msg);
> + send_vhost_reply(fd, &msg);
> + break;
> +
>
This should handle the case of vhost_user_set_postcopy_advise returning an
error. Otherwise the msg wrong contents are sent back.
> default:
> ret = -1;
> break;
> diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
> index dd0262f8f..2030b40a5 100644
> --- a/lib/librte_vhost/vhost_user.h
> +++ b/lib/librte_vhost/vhost_user.h
> @@ -50,7 +50,8 @@ typedef enum VhostUserRequest {
> VHOST_USER_IOTLB_MSG = 22,
> VHOST_USER_CRYPTO_CREATE_SESS = 26,
> VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> - VHOST_USER_MAX = 28
> + VHOST_USER_POSTCOPY_ADVISE = 28,
> + VHOST_USER_MAX = 29
> } VhostUserRequest;
>
> typedef enum VhostUserSlaveRequest {
> --
> 2.17.1
>
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH 04/12] vhost: introduce postcopy's advise message
2018-09-26 15:22 ` Alejandro Lucero
@ 2018-09-27 9:35 ` Maxime Coquelin
0 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-27 9:35 UTC (permalink / raw)
To: Alejandro Lucero; +Cc: dev, tiwei.bie, zhihong.wang, jfreimann, dgilbert
On 09/26/2018 05:22 PM, Alejandro Lucero wrote:
>
>
> On Wed, Sep 26, 2018 at 8:27 AM Maxime Coquelin
> <maxime.coquelin@redhat.com <mailto:maxime.coquelin@redhat.com>> wrote:
>
> This patch opens a userfaultfd and sends it back to Qemu's
> VHOST_USER_POSTCOPY_ADVISE request.
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com
> <mailto:dgilbert@redhat.com>>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com
> <mailto:maxime.coquelin@redhat.com>>
> ---
> lib/librte_vhost/vhost.h | 2 ++
> lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
> lib/librte_vhost/vhost_user.h | 3 ++-
> 3 files changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 25ffd7614..21722d8a8 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -363,6 +363,8 @@ struct virtio_net {
> int slave_req_fd;
> rte_spinlock_t slave_req_lock;
>
> + int postcopy_ufd;
> +
> /*
> * Device id to identify a specific backend device.
> * It's set to -1 for the default software implementation.
> diff --git a/lib/librte_vhost/vhost_user.c
> b/lib/librte_vhost/vhost_user.c
> index a9b429598..bdfe2cac0 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -24,9 +24,13 @@
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> +#include <fcntl.h>
> +#include <linux/userfaultfd.h>
> +#include <sys/ioctl.h>
> #include <sys/mman.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> +#include <sys/syscall.h>
> #include <assert.h>
> #ifdef RTE_LIBRTE_VHOST_NUMA
> #include <numaif.h>
> @@ -69,6 +73,7 @@ static const char
> *vhost_message_str[VHOST_USER_MAX] = {
> [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG",
> [VHOST_USER_CRYPTO_CREATE_SESS] =
> "VHOST_USER_CRYPTO_CREATE_SESS",
> [VHOST_USER_CRYPTO_CLOSE_SESS] =
> "VHOST_USER_CRYPTO_CLOSE_SESS",
> + [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE",
> };
>
> static uint64_t
> @@ -1412,6 +1417,33 @@ vhost_user_iotlb_msg(struct virtio_net
> **pdev, struct VhostUserMsg *msg)
> return 0;
> }
>
> +static int
> +vhost_user_set_postcopy_advise(struct virtio_net *dev, struct
> VhostUserMsg *msg)
> +{
> + struct uffdio_api api_struct;
> +
> +
> + dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC |
> O_NONBLOCK);
> +
> + if (dev->postcopy_ufd == -1) {
> + RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not
> available: %s\n",
> + strerror(errno));
> + return -1;
> + }
> + api_struct.api = UFFD_API;
> + api_struct.features = 0;
> + if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
> + RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl
> failure: %s\n",
> + strerror(errno));
> + close(dev->postcopy_ufd);
> + return -1;
> + }
> + msg->fds[0] = dev->postcopy_ufd;
> + msg->fd_num = 1;
> +
> + return 0;
> +}
> +
> /* return bytes# of read on success or negative val on failure. */
> static int
> read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> @@ -1756,6 +1788,11 @@ vhost_user_msg_handler(int vid, int fd)
> ret = vhost_user_iotlb_msg(&dev, &msg);
> break;
>
> + case VHOST_USER_POSTCOPY_ADVISE:
> + vhost_user_set_postcopy_advise(dev, &msg);
> + send_vhost_reply(fd, &msg);
> + break;
> +
>
>
> This should handle the case of vhost_user_set_postcopy_advise returning
> an error. Otherwise the msg wrong contents are sent back.
Right, I'll fix it when rebasing.
>
> default:
> ret = -1;
> break;
> diff --git a/lib/librte_vhost/vhost_user.h
> b/lib/librte_vhost/vhost_user.h
> index dd0262f8f..2030b40a5 100644
> --- a/lib/librte_vhost/vhost_user.h
> +++ b/lib/librte_vhost/vhost_user.h
> @@ -50,7 +50,8 @@ typedef enum VhostUserRequest {
> VHOST_USER_IOTLB_MSG = 22,
> VHOST_USER_CRYPTO_CREATE_SESS = 26,
> VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> - VHOST_USER_MAX = 28
> + VHOST_USER_POSTCOPY_ADVISE = 28,
> + VHOST_USER_MAX = 29
> } VhostUserRequest;
>
> typedef enum VhostUserSlaveRequest {
> --
> 2.17.1
>
Thanks,
Maxime
^ permalink raw reply [flat|nested] 23+ messages in thread
[parent not found: <CGME20180927082608eucas1p17cd1d99e54134fb2a6de3151e52048f3@eucas1p1.samsung.com>]
* Re: [dpdk-dev] [04/12] vhost: introduce postcopy's advise message
[not found] ` <CGME20180927082608eucas1p17cd1d99e54134fb2a6de3151e52048f3@eucas1p1.samsung.com>
@ 2018-09-27 8:28 ` Ilya Maximets
2018-09-28 10:40 ` Ilya Maximets
0 siblings, 1 reply; 23+ messages in thread
From: Ilya Maximets @ 2018-09-27 8:28 UTC (permalink / raw)
To: Maxime Coquelin, dev, tiwei.bie, zhihong.wang, jfreimann
Cc: dgilbert, Thomas Monjalon
On 26.09.2018 10:26, Maxime Coquelin wrote:
> This patch opens a userfaultfd and sends it back to Qemu's
> VHOST_USER_POSTCOPY_ADVISE request.
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/librte_vhost/vhost.h | 2 ++
> lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
> lib/librte_vhost/vhost_user.h | 3 ++-
> 3 files changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 25ffd7614..21722d8a8 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -363,6 +363,8 @@ struct virtio_net {
> int slave_req_fd;
> rte_spinlock_t slave_req_lock;
>
> + int postcopy_ufd;
> +
> /*
> * Device id to identify a specific backend device.
> * It's set to -1 for the default software implementation.
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index a9b429598..bdfe2cac0 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -24,9 +24,13 @@
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> +#include <fcntl.h>
> +#include <linux/userfaultfd.h>
Maybe we need compile time check for this header existence?
Otherwise, this will bump minimal kernel version for default linux build
to something like 4.3.
> +#include <sys/ioctl.h>
> #include <sys/mman.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> +#include <sys/syscall.h>
> #include <assert.h>
> #ifdef RTE_LIBRTE_VHOST_NUMA
> #include <numaif.h>
> @@ -69,6 +73,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
> [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG",
> [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
> [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
> + [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE",
> };
>
> static uint64_t
> @@ -1412,6 +1417,33 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
> return 0;
> }
>
> +static int
> +vhost_user_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg)
> +{
> + struct uffdio_api api_struct;
> +
> +
> + dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
> +
> + if (dev->postcopy_ufd == -1) {
> + RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
> + strerror(errno));
> + return -1;
> + }
> + api_struct.api = UFFD_API;
> + api_struct.features = 0;
> + if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
> + RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
> + strerror(errno));
> + close(dev->postcopy_ufd);
> + return -1;
> + }
> + msg->fds[0] = dev->postcopy_ufd;
> + msg->fd_num = 1;
> +
> + return 0;
> +}
> +
> /* return bytes# of read on success or negative val on failure. */
> static int
> read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> @@ -1756,6 +1788,11 @@ vhost_user_msg_handler(int vid, int fd)
> ret = vhost_user_iotlb_msg(&dev, &msg);
> break;
>
> + case VHOST_USER_POSTCOPY_ADVISE:
> + vhost_user_set_postcopy_advise(dev, &msg);
> + send_vhost_reply(fd, &msg);
> + break;
> +
> default:
> ret = -1;
> break;
> diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
> index dd0262f8f..2030b40a5 100644
> --- a/lib/librte_vhost/vhost_user.h
> +++ b/lib/librte_vhost/vhost_user.h
> @@ -50,7 +50,8 @@ typedef enum VhostUserRequest {
> VHOST_USER_IOTLB_MSG = 22,
> VHOST_USER_CRYPTO_CREATE_SESS = 26,
> VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> - VHOST_USER_MAX = 28
> + VHOST_USER_POSTCOPY_ADVISE = 28,
> + VHOST_USER_MAX = 29
> } VhostUserRequest;
>
> typedef enum VhostUserSlaveRequest {
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [04/12] vhost: introduce postcopy's advise message
2018-09-27 8:28 ` [dpdk-dev] [04/12] " Ilya Maximets
@ 2018-09-28 10:40 ` Ilya Maximets
2018-09-28 12:13 ` Bruce Richardson
0 siblings, 1 reply; 23+ messages in thread
From: Ilya Maximets @ 2018-09-28 10:40 UTC (permalink / raw)
To: Maxime Coquelin, dev, tiwei.bie, zhihong.wang, jfreimann
Cc: dgilbert, Thomas Monjalon
On 27.09.2018 11:28, Ilya Maximets wrote:
> On 26.09.2018 10:26, Maxime Coquelin wrote:
>> This patch opens a userfaultfd and sends it back to Qemu's
>> VHOST_USER_POSTCOPY_ADVISE request.
>>
>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>> lib/librte_vhost/vhost.h | 2 ++
>> lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
>> lib/librte_vhost/vhost_user.h | 3 ++-
>> 3 files changed, 41 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>> index 25ffd7614..21722d8a8 100644
>> --- a/lib/librte_vhost/vhost.h
>> +++ b/lib/librte_vhost/vhost.h
>> @@ -363,6 +363,8 @@ struct virtio_net {
>> int slave_req_fd;
>> rte_spinlock_t slave_req_lock;
>>
>> + int postcopy_ufd;
>> +
>> /*
>> * Device id to identify a specific backend device.
>> * It's set to -1 for the default software implementation.
>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>> index a9b429598..bdfe2cac0 100644
>> --- a/lib/librte_vhost/vhost_user.c
>> +++ b/lib/librte_vhost/vhost_user.c
>> @@ -24,9 +24,13 @@
>> #include <stdlib.h>
>> #include <string.h>
>> #include <unistd.h>
>> +#include <fcntl.h>
>> +#include <linux/userfaultfd.h>
>
> Maybe we need compile time check for this header existence?
> Otherwise, this will bump minimal kernel version for default linux build
> to something like 4.3.
We'll need a config option here (disabled by default) and guard all
the postcopy related code.
Meson build will be able to detect the header file and enable
the config if possible. Like this:
lib/librte_vhost/meson.build:
if cc.has_header('linux/userfaultfd.h')
dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)
endif
Best regards, Ilya Maximets.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [04/12] vhost: introduce postcopy's advise message
2018-09-28 10:40 ` Ilya Maximets
@ 2018-09-28 12:13 ` Bruce Richardson
2018-09-28 13:17 ` Ilya Maximets
0 siblings, 1 reply; 23+ messages in thread
From: Bruce Richardson @ 2018-09-28 12:13 UTC (permalink / raw)
To: Ilya Maximets
Cc: Maxime Coquelin, dev, tiwei.bie, zhihong.wang, jfreimann,
dgilbert, Thomas Monjalon
On Fri, Sep 28, 2018 at 01:40:25PM +0300, Ilya Maximets wrote:
> On 27.09.2018 11:28, Ilya Maximets wrote:
> > On 26.09.2018 10:26, Maxime Coquelin wrote:
> >> This patch opens a userfaultfd and sends it back to Qemu's
> >> VHOST_USER_POSTCOPY_ADVISE request.
> >>
> >> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> >> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >> ---
> >> lib/librte_vhost/vhost.h | 2 ++
> >> lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
> >> lib/librte_vhost/vhost_user.h | 3 ++-
> >> 3 files changed, 41 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> >> index 25ffd7614..21722d8a8 100644
> >> --- a/lib/librte_vhost/vhost.h
> >> +++ b/lib/librte_vhost/vhost.h
> >> @@ -363,6 +363,8 @@ struct virtio_net {
> >> int slave_req_fd;
> >> rte_spinlock_t slave_req_lock;
> >>
> >> + int postcopy_ufd;
> >> +
> >> /*
> >> * Device id to identify a specific backend device.
> >> * It's set to -1 for the default software implementation.
> >> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> >> index a9b429598..bdfe2cac0 100644
> >> --- a/lib/librte_vhost/vhost_user.c
> >> +++ b/lib/librte_vhost/vhost_user.c
> >> @@ -24,9 +24,13 @@
> >> #include <stdlib.h>
> >> #include <string.h>
> >> #include <unistd.h>
> >> +#include <fcntl.h>
> >> +#include <linux/userfaultfd.h>
> >
> > Maybe we need compile time check for this header existence?
> > Otherwise, this will bump minimal kernel version for default linux build
> > to something like 4.3.
>
> We'll need a config option here (disabled by default) and guard all
> the postcopy related code.
> Meson build will be able to detect the header file and enable
> the config if possible. Like this:
>
> lib/librte_vhost/meson.build:
> if cc.has_header('linux/userfaultfd.h')
> dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)
Are you sure you want 'set10' rather than 'set'. Set is probably easier
because it ensures no define on false, while set10 has a define of 0. This
has caught me out before.
FYI, you can also avoid the if by putting the condition into the define:
dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', cc.has_header('...'))
/Bruce
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [04/12] vhost: introduce postcopy's advise message
2018-09-28 12:13 ` Bruce Richardson
@ 2018-09-28 13:17 ` Ilya Maximets
2018-09-28 13:24 ` Bruce Richardson
0 siblings, 1 reply; 23+ messages in thread
From: Ilya Maximets @ 2018-09-28 13:17 UTC (permalink / raw)
To: Bruce Richardson
Cc: Maxime Coquelin, dev, tiwei.bie, zhihong.wang, jfreimann,
dgilbert, Thomas Monjalon
On 28.09.2018 15:13, Bruce Richardson wrote:
> On Fri, Sep 28, 2018 at 01:40:25PM +0300, Ilya Maximets wrote:
>> On 27.09.2018 11:28, Ilya Maximets wrote:
>>> On 26.09.2018 10:26, Maxime Coquelin wrote:
>>>> This patch opens a userfaultfd and sends it back to Qemu's
>>>> VHOST_USER_POSTCOPY_ADVISE request.
>>>>
>>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>> ---
>>>> lib/librte_vhost/vhost.h | 2 ++
>>>> lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
>>>> lib/librte_vhost/vhost_user.h | 3 ++-
>>>> 3 files changed, 41 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>>>> index 25ffd7614..21722d8a8 100644
>>>> --- a/lib/librte_vhost/vhost.h
>>>> +++ b/lib/librte_vhost/vhost.h
>>>> @@ -363,6 +363,8 @@ struct virtio_net {
>>>> int slave_req_fd;
>>>> rte_spinlock_t slave_req_lock;
>>>>
>>>> + int postcopy_ufd;
>>>> +
>>>> /*
>>>> * Device id to identify a specific backend device.
>>>> * It's set to -1 for the default software implementation.
>>>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>>>> index a9b429598..bdfe2cac0 100644
>>>> --- a/lib/librte_vhost/vhost_user.c
>>>> +++ b/lib/librte_vhost/vhost_user.c
>>>> @@ -24,9 +24,13 @@
>>>> #include <stdlib.h>
>>>> #include <string.h>
>>>> #include <unistd.h>
>>>> +#include <fcntl.h>
>>>> +#include <linux/userfaultfd.h>
>>>
>>> Maybe we need compile time check for this header existence?
>>> Otherwise, this will bump minimal kernel version for default linux build
>>> to something like 4.3.
>>
>> We'll need a config option here (disabled by default) and guard all
>> the postcopy related code.
>> Meson build will be able to detect the header file and enable
>> the config if possible. Like this:
>>
>> lib/librte_vhost/meson.build:
>> if cc.has_header('linux/userfaultfd.h')
>> dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)
>
> Are you sure you want 'set10' rather than 'set'. Set is probably easier
> because it ensures no define on false, while set10 has a define of 0. This
> has caught me out before.
>
> FYI, you can also avoid the if by putting the condition into the define:
>
> dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', cc.has_header('...'))
Sure, this variant looks better. Thanks for suggestions.
I just copied my version from the similar code for 'RTE_HAS_LIBNUMA'.
Best regards, Ilya Maximets.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [04/12] vhost: introduce postcopy's advise message
2018-09-28 13:17 ` Ilya Maximets
@ 2018-09-28 13:24 ` Bruce Richardson
2018-09-28 13:49 ` Maxime Coquelin
0 siblings, 1 reply; 23+ messages in thread
From: Bruce Richardson @ 2018-09-28 13:24 UTC (permalink / raw)
To: Ilya Maximets
Cc: Maxime Coquelin, dev, tiwei.bie, zhihong.wang, jfreimann,
dgilbert, Thomas Monjalon
On Fri, Sep 28, 2018 at 04:17:34PM +0300, Ilya Maximets wrote:
> On 28.09.2018 15:13, Bruce Richardson wrote:
> > On Fri, Sep 28, 2018 at 01:40:25PM +0300, Ilya Maximets wrote:
> >> On 27.09.2018 11:28, Ilya Maximets wrote:
> >>> On 26.09.2018 10:26, Maxime Coquelin wrote:
> >>>> This patch opens a userfaultfd and sends it back to Qemu's
> >>>> VHOST_USER_POSTCOPY_ADVISE request.
> >>>>
> >>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> >>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >>>> ---
> >>>> lib/librte_vhost/vhost.h | 2 ++
> >>>> lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
> >>>> lib/librte_vhost/vhost_user.h | 3 ++-
> >>>> 3 files changed, 41 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> >>>> index 25ffd7614..21722d8a8 100644
> >>>> --- a/lib/librte_vhost/vhost.h
> >>>> +++ b/lib/librte_vhost/vhost.h
> >>>> @@ -363,6 +363,8 @@ struct virtio_net {
> >>>> int slave_req_fd;
> >>>> rte_spinlock_t slave_req_lock;
> >>>>
> >>>> + int postcopy_ufd;
> >>>> +
> >>>> /*
> >>>> * Device id to identify a specific backend device.
> >>>> * It's set to -1 for the default software implementation.
> >>>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> >>>> index a9b429598..bdfe2cac0 100644
> >>>> --- a/lib/librte_vhost/vhost_user.c
> >>>> +++ b/lib/librte_vhost/vhost_user.c
> >>>> @@ -24,9 +24,13 @@
> >>>> #include <stdlib.h>
> >>>> #include <string.h>
> >>>> #include <unistd.h>
> >>>> +#include <fcntl.h>
> >>>> +#include <linux/userfaultfd.h>
> >>>
> >>> Maybe we need compile time check for this header existence?
> >>> Otherwise, this will bump minimal kernel version for default linux build
> >>> to something like 4.3.
> >>
> >> We'll need a config option here (disabled by default) and guard all
> >> the postcopy related code.
> >> Meson build will be able to detect the header file and enable
> >> the config if possible. Like this:
> >>
> >> lib/librte_vhost/meson.build:
> >> if cc.has_header('linux/userfaultfd.h')
> >> dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)
> >
> > Are you sure you want 'set10' rather than 'set'. Set is probably easier
> > because it ensures no define on false, while set10 has a define of 0. This
> > has caught me out before.
> >
> > FYI, you can also avoid the if by putting the condition into the define:
> >
> > dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', cc.has_header('...'))
>
> Sure, this variant looks better. Thanks for suggestions.
> I just copied my version from the similar code for 'RTE_HAS_LIBNUMA'.
>
Yes, looking at that code, it could do with a clean-up to shorten it too.
[It's true that nothing embarasses a programmer more than their own code 6
months layer :-)]
/Bruce
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [04/12] vhost: introduce postcopy's advise message
2018-09-28 13:24 ` Bruce Richardson
@ 2018-09-28 13:49 ` Maxime Coquelin
0 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-28 13:49 UTC (permalink / raw)
To: Bruce Richardson, Ilya Maximets
Cc: dev, tiwei.bie, zhihong.wang, jfreimann, dgilbert, Thomas Monjalon
On 09/28/2018 03:24 PM, Bruce Richardson wrote:
> On Fri, Sep 28, 2018 at 04:17:34PM +0300, Ilya Maximets wrote:
>> On 28.09.2018 15:13, Bruce Richardson wrote:
>>> On Fri, Sep 28, 2018 at 01:40:25PM +0300, Ilya Maximets wrote:
>>>> On 27.09.2018 11:28, Ilya Maximets wrote:
>>>>> On 26.09.2018 10:26, Maxime Coquelin wrote:
>>>>>> This patch opens a userfaultfd and sends it back to Qemu's
>>>>>> VHOST_USER_POSTCOPY_ADVISE request.
>>>>>>
>>>>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>>>> ---
>>>>>> lib/librte_vhost/vhost.h | 2 ++
>>>>>> lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
>>>>>> lib/librte_vhost/vhost_user.h | 3 ++-
>>>>>> 3 files changed, 41 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>>>>>> index 25ffd7614..21722d8a8 100644
>>>>>> --- a/lib/librte_vhost/vhost.h
>>>>>> +++ b/lib/librte_vhost/vhost.h
>>>>>> @@ -363,6 +363,8 @@ struct virtio_net {
>>>>>> int slave_req_fd;
>>>>>> rte_spinlock_t slave_req_lock;
>>>>>>
>>>>>> + int postcopy_ufd;
>>>>>> +
>>>>>> /*
>>>>>> * Device id to identify a specific backend device.
>>>>>> * It's set to -1 for the default software implementation.
>>>>>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>>>>>> index a9b429598..bdfe2cac0 100644
>>>>>> --- a/lib/librte_vhost/vhost_user.c
>>>>>> +++ b/lib/librte_vhost/vhost_user.c
>>>>>> @@ -24,9 +24,13 @@
>>>>>> #include <stdlib.h>
>>>>>> #include <string.h>
>>>>>> #include <unistd.h>
>>>>>> +#include <fcntl.h>
>>>>>> +#include <linux/userfaultfd.h>
>>>>>
>>>>> Maybe we need compile time check for this header existence?
>>>>> Otherwise, this will bump minimal kernel version for default linux build
>>>>> to something like 4.3.
>>>>
>>>> We'll need a config option here (disabled by default) and guard all
>>>> the postcopy related code.
>>>> Meson build will be able to detect the header file and enable
>>>> the config if possible. Like this:
>>>>
>>>> lib/librte_vhost/meson.build:
>>>> if cc.has_header('linux/userfaultfd.h')
>>>> dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)
>>>
>>> Are you sure you want 'set10' rather than 'set'. Set is probably easier
>>> because it ensures no define on false, while set10 has a define of 0. This
>>> has caught me out before.
>>>
>>> FYI, you can also avoid the if by putting the condition into the define:
>>>
>>> dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', cc.has_header('...'))
>>
>> Sure, this variant looks better. Thanks for suggestions.
>> I just copied my version from the similar code for 'RTE_HAS_LIBNUMA'.
>>
Thanks Ilya & Bruce for the hint!
I'll do this in next version.
> Yes, looking at that code, it could do with a clean-up to shorten it too.
> [It's true that nothing embarasses a programmer more than their own code 6
> months layer :-)]
:)
Maxime
> /Bruce
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 05/12] vhost: add support for postcopy's listen message
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
` (3 preceding siblings ...)
2018-09-26 7:26 ` [dpdk-dev] [PATCH 04/12] vhost: introduce postcopy's advise message Maxime Coquelin
@ 2018-09-26 7:26 ` Maxime Coquelin
2018-09-26 7:26 ` [dpdk-dev] [PATCH 06/12] vhost: register new regions with userfaultfd Maxime Coquelin
` (6 subsequent siblings)
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:26 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/vhost.h | 1 +
lib/librte_vhost/vhost_user.c | 18 ++++++++++++++++++
lib/librte_vhost/vhost_user.h | 4 +++-
3 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 21722d8a8..9453cb28d 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -364,6 +364,7 @@ struct virtio_net {
rte_spinlock_t slave_req_lock;
int postcopy_ufd;
+ int postcopy_listening;
/*
* Device id to identify a specific backend device.
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bdfe2cac0..d9ef5bca8 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -74,6 +74,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
[VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE",
+ [VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN",
};
static uint64_t
@@ -1444,6 +1445,19 @@ vhost_user_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg)
return 0;
}
+static int
+vhost_user_set_postcopy_listen(struct virtio_net *dev)
+{
+ if (dev->mem && dev->mem->nregions) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Regions already registered at postcopy-listen\n");
+ return -1;
+ }
+ dev->postcopy_listening = 1;
+
+ return 0;
+}
+
/* return bytes# of read on success or negative val on failure. */
static int
read_vhost_message(int sockfd, struct VhostUserMsg *msg)
@@ -1793,6 +1807,10 @@ vhost_user_msg_handler(int vid, int fd)
send_vhost_reply(fd, &msg);
break;
+ case VHOST_USER_POSTCOPY_LISTEN:
+ ret = vhost_user_set_postcopy_listen(dev);
+ break;
+
default:
ret = -1;
break;
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 2030b40a5..73b1fe2b9 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -51,7 +51,9 @@ typedef enum VhostUserRequest {
VHOST_USER_CRYPTO_CREATE_SESS = 26,
VHOST_USER_CRYPTO_CLOSE_SESS = 27,
VHOST_USER_POSTCOPY_ADVISE = 28,
- VHOST_USER_MAX = 29
+ VHOST_USER_POSTCOPY_LISTEN = 29,
+ VHOST_USER_POSTCOPY_END = 30,
+ VHOST_USER_MAX = 31
} VhostUserRequest;
typedef enum VhostUserSlaveRequest {
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 06/12] vhost: register new regions with userfaultfd
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
` (4 preceding siblings ...)
2018-09-26 7:26 ` [dpdk-dev] [PATCH 05/12] vhost: add support for postcopy's listen message Maxime Coquelin
@ 2018-09-26 7:26 ` Maxime Coquelin
2018-09-26 15:31 ` Alejandro Lucero
2018-09-26 7:27 ` [dpdk-dev] [PATCH 07/12] vhost: avoid useless VhostUserMemory copy Maxime Coquelin
` (5 subsequent siblings)
11 siblings, 1 reply; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:26 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/vhost_user.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index d9ef5bca8..cbbfbdf00 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -928,6 +928,28 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
mmap_size,
alignment,
mmap_offset);
+
+ if (dev->postcopy_listening) {
+ struct uffdio_register reg_struct;
+
+ reg_struct.range.start = (uint64_t)(uintptr_t)mmap_addr;
+ reg_struct.range.len = mmap_size;
+ reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
+
+ if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
+ ®_struct)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to register ufd for region %d: (ufd = %d) %s\n",
+ i, dev->postcopy_ufd,
+ strerror(errno));
+ continue;
+ }
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "\t userfaultfd registered for range : %llx - %llx\n",
+ reg_struct.range.start,
+ reg_struct.range.start +
+ reg_struct.range.len - 1);
+ }
}
for (i = 0; i < dev->nr_vring; i++) {
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH 06/12] vhost: register new regions with userfaultfd
2018-09-26 7:26 ` [dpdk-dev] [PATCH 06/12] vhost: register new regions with userfaultfd Maxime Coquelin
@ 2018-09-26 15:31 ` Alejandro Lucero
2018-09-27 9:37 ` Maxime Coquelin
0 siblings, 1 reply; 23+ messages in thread
From: Alejandro Lucero @ 2018-09-26 15:31 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: dev, tiwei.bie, zhihong.wang, jfreimann, dgilbert
On Wed, Sep 26, 2018 at 8:28 AM Maxime Coquelin <maxime.coquelin@redhat.com>
wrote:
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/librte_vhost/vhost_user.c | 22 ++++++++++++++++++++++
> 1 file changed, 22 insertions(+)
>
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index d9ef5bca8..cbbfbdf00 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -928,6 +928,28 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *pmsg)
> mmap_size,
> alignment,
> mmap_offset);
> +
> + if (dev->postcopy_listening) {
> + struct uffdio_register reg_struct;
> +
> + reg_struct.range.start =
> (uint64_t)(uintptr_t)mmap_addr;
> + reg_struct.range.len = mmap_size;
> + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
> +
> + if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
> + ®_struct)) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Failed to register ufd
> for region %d: (ufd = %d) %s\n",
> + i, dev->postcopy_ufd,
> + strerror(errno));
> + continue;
>
is it not a registration error a serious problem for supporting migration
properly?
> + }
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "\t userfaultfd registered for
> range : %llx - %llx\n",
> + reg_struct.range.start,
> + reg_struct.range.start +
> + reg_struct.range.len - 1);
> + }
> }
>
> for (i = 0; i < dev->nr_vring; i++) {
> --
> 2.17.1
>
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [dpdk-dev] [PATCH 06/12] vhost: register new regions with userfaultfd
2018-09-26 15:31 ` Alejandro Lucero
@ 2018-09-27 9:37 ` Maxime Coquelin
0 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-27 9:37 UTC (permalink / raw)
To: Alejandro Lucero; +Cc: dev, tiwei.bie, zhihong.wang, jfreimann, dgilbert
On 09/26/2018 05:31 PM, Alejandro Lucero wrote:
>
>
> On Wed, Sep 26, 2018 at 8:28 AM Maxime Coquelin
> <maxime.coquelin@redhat.com <mailto:maxime.coquelin@redhat.com>> wrote:
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com
> <mailto:dgilbert@redhat.com>>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com
> <mailto:maxime.coquelin@redhat.com>>
> ---
> lib/librte_vhost/vhost_user.c | 22 ++++++++++++++++++++++
> 1 file changed, 22 insertions(+)
>
> diff --git a/lib/librte_vhost/vhost_user.c
> b/lib/librte_vhost/vhost_user.c
> index d9ef5bca8..cbbfbdf00 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -928,6 +928,28 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *pmsg)
> mmap_size,
> alignment,
> mmap_offset);
> +
> + if (dev->postcopy_listening) {
> + struct uffdio_register reg_struct;
> +
> + reg_struct.range.start =
> (uint64_t)(uintptr_t)mmap_addr;
> + reg_struct.range.len = mmap_size;
> + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
> +
> + if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
> + ®_struct)) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Failed to register
> ufd for region %d: (ufd = %d) %s\n",
> + i, dev->postcopy_ufd,
> + strerror(errno));
> + continue;
>
>
> is it not a registration error a serious problem for supporting
> migration properly?
Yes it is, I missed to fix that.
Thanks for reporting it.
Maxime
> + }
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "\t userfaultfd registered
> for range : %llx - %llx\n",
> + reg_struct.range.start,
> + reg_struct.range.start +
> + reg_struct.range.len - 1);
> + }
> }
>
> for (i = 0; i < dev->nr_vring; i++) {
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 07/12] vhost: avoid useless VhostUserMemory copy
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
` (5 preceding siblings ...)
2018-09-26 7:26 ` [dpdk-dev] [PATCH 06/12] vhost: register new regions with userfaultfd Maxime Coquelin
@ 2018-09-26 7:27 ` Maxime Coquelin
2018-09-26 7:27 ` [dpdk-dev] [PATCH 08/12] vhost: send userfault range addresses back to qemu Maxime Coquelin
` (4 subsequent siblings)
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:27 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
The VHOST_USER_SET_MEM_TABLE payload is copied when handled,
whereas it could directly be referenced.
This is not very important, but next, we'll need to update the
payload and send it back to Qemu.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/vhost_user.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index cbbfbdf00..37b5edcd7 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -789,7 +789,7 @@ static int
vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
{
struct virtio_net *dev = *pdev;
- struct VhostUserMemory memory = pmsg->payload.memory;
+ struct VhostUserMemory *memory = &pmsg->payload.memory;
struct rte_vhost_mem_region *reg;
void *mmap_addr;
uint64_t mmap_size;
@@ -799,17 +799,17 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
int populate;
int fd;
- if (memory.nregions > VHOST_MEMORY_MAX_NREGIONS) {
+ if (memory->nregions > VHOST_MEMORY_MAX_NREGIONS) {
RTE_LOG(ERR, VHOST_CONFIG,
- "too many memory regions (%u)\n", memory.nregions);
+ "too many memory regions (%u)\n", memory->nregions);
return -1;
}
- if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) {
+ if (dev->mem && !vhost_memory_changed(memory, dev->mem)) {
RTE_LOG(INFO, VHOST_CONFIG,
"(%d) memory regions not changed\n", dev->vid);
- for (i = 0; i < memory.nregions; i++)
+ for (i = 0; i < memory->nregions; i++)
close(pmsg->fds[i]);
return 0;
@@ -841,25 +841,25 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
}
dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
- sizeof(struct rte_vhost_mem_region) * memory.nregions, 0);
+ sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
if (dev->mem == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to allocate memory for dev->mem\n",
dev->vid);
return -1;
}
- dev->mem->nregions = memory.nregions;
+ dev->mem->nregions = memory->nregions;
- for (i = 0; i < memory.nregions; i++) {
+ for (i = 0; i < memory->nregions; i++) {
fd = pmsg->fds[i];
reg = &dev->mem->regions[i];
- reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
- reg->guest_user_addr = memory.regions[i].userspace_addr;
- reg->size = memory.regions[i].memory_size;
+ reg->guest_phys_addr = memory->regions[i].guest_phys_addr;
+ reg->guest_user_addr = memory->regions[i].userspace_addr;
+ reg->size = memory->regions[i].memory_size;
reg->fd = fd;
- mmap_offset = memory.regions[i].mmap_offset;
+ mmap_offset = memory->regions[i].mmap_offset;
/* Check for memory_size + mmap_offset overflow */
if (mmap_offset >= -reg->size) {
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 08/12] vhost: send userfault range addresses back to qemu
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
` (6 preceding siblings ...)
2018-09-26 7:27 ` [dpdk-dev] [PATCH 07/12] vhost: avoid useless VhostUserMemory copy Maxime Coquelin
@ 2018-09-26 7:27 ` Maxime Coquelin
2018-09-26 7:27 ` [dpdk-dev] [PATCH 09/12] vhost: add support to postcopy's end request Maxime Coquelin
` (3 subsequent siblings)
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:27 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/vhost_user.c | 52 ++++++++++++++++++++++++++++++++---
1 file changed, 48 insertions(+), 4 deletions(-)
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 37b5edcd7..766df7a58 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -77,6 +77,11 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN",
};
+static int
+send_vhost_reply(int sockfd, struct VhostUserMsg *msg);
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg);
+
static uint64_t
get_blk_size(int fd)
{
@@ -786,7 +791,8 @@ vhost_memory_changed(struct VhostUserMemory *new,
}
static int
-vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
+vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg,
+ int main_fd)
{
struct virtio_net *dev = *pdev;
struct VhostUserMemory *memory = &pmsg->payload.memory;
@@ -930,10 +936,48 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
mmap_offset);
if (dev->postcopy_listening) {
+ /*
+ * We haven't a better way right now than sharing
+ * DPDK's virtual address with Qemu, so that Qemu can
+ * retreive the region offset when handling userfaults.
+ */
+ memory->regions[i].userspace_addr =
+ reg->host_user_addr;
+ }
+ }
+ if (dev->postcopy_listening) {
+ /* Send the addresses back to qemu */
+ pmsg->fd_num = 0;
+ send_vhost_reply(main_fd, pmsg);
+
+ /* Wait for qemu to acknolwedge it's got the addresses
+ * we've got to wait before we're allowed to generate faults.
+ */
+ VhostUserMsg ack_msg;
+ if (read_vhost_message(main_fd, &ack_msg) <= 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to read qemu ack on postcopy set-mem-table\n");
+ goto err_mmap;
+ }
+ if (ack_msg.request.master != VHOST_USER_SET_MEM_TABLE) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Bad qemu ack on postcopy set-mem-table (%d)\n",
+ ack_msg.request.master);
+ goto err_mmap;
+ }
+
+ /* Now userfault register and we can use the memory */
+ for (i = 0; i < memory->nregions; i++) {
+ reg = &dev->mem->regions[i];
struct uffdio_register reg_struct;
- reg_struct.range.start = (uint64_t)(uintptr_t)mmap_addr;
- reg_struct.range.len = mmap_size;
+ /*
+ * Let's register all the mmap'ed area to ensure
+ * alignement on page boundary.
+ */
+ reg_struct.range.start =
+ (uint64_t)(uintptr_t)reg->mmap_addr;
+ reg_struct.range.len = reg->mmap_size;
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
@@ -1750,7 +1794,7 @@ vhost_user_msg_handler(int vid, int fd)
break;
case VHOST_USER_SET_MEM_TABLE:
- ret = vhost_user_set_mem_table(&dev, &msg);
+ ret = vhost_user_set_mem_table(&dev, &msg, fd);
break;
case VHOST_USER_SET_LOG_BASE:
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 09/12] vhost: add support to postcopy's end request
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
` (7 preceding siblings ...)
2018-09-26 7:27 ` [dpdk-dev] [PATCH 08/12] vhost: send userfault range addresses back to qemu Maxime Coquelin
@ 2018-09-26 7:27 ` Maxime Coquelin
2018-09-26 7:27 ` [dpdk-dev] [PATCH 10/12] vhost: enable postcopy protocol feature Maxime Coquelin
` (2 subsequent siblings)
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:27 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
The master sends this message before stopping handling
userfaults, so that the backend closes the userfaultfd.
The master waits for the slave to acknowledge the request
with an empty 64bits payload for synchronization purpose.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/vhost_user.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 766df7a58..83b080610 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -75,6 +75,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
[VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE",
[VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN",
+ [VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END",
};
static int
@@ -1524,6 +1525,18 @@ vhost_user_set_postcopy_listen(struct virtio_net *dev)
return 0;
}
+static int
+vhost_user_postcopy_end(struct virtio_net *dev)
+{
+ dev->postcopy_listening = 0;
+ if (dev->postcopy_ufd > 0) {
+ close(dev->postcopy_ufd);
+ dev->postcopy_ufd = -1;
+ }
+
+ return 0;
+}
+
/* return bytes# of read on success or negative val on failure. */
static int
read_vhost_message(int sockfd, struct VhostUserMsg *msg)
@@ -1877,6 +1890,14 @@ vhost_user_msg_handler(int vid, int fd)
ret = vhost_user_set_postcopy_listen(dev);
break;
+ case VHOST_USER_POSTCOPY_END:
+ vhost_user_postcopy_end(dev);
+ msg.payload.u64 = 0;
+ msg.size = sizeof(msg.payload.u64);
+ msg.fd_num = 0;
+ send_vhost_reply(fd, &msg);
+ break;
+
default:
ret = -1;
break;
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 10/12] vhost: enable postcopy protocol feature
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
` (8 preceding siblings ...)
2018-09-26 7:27 ` [dpdk-dev] [PATCH 09/12] vhost: add support to postcopy's end request Maxime Coquelin
@ 2018-09-26 7:27 ` Maxime Coquelin
2018-09-26 7:27 ` [dpdk-dev] [PATCH 11/12] vhost: add flag to enable postcopy live-migration Maxime Coquelin
2018-09-26 7:27 ` [dpdk-dev] [PATCH 12/12] net/vhost: add parameter to enable postcopy migration support Maxime Coquelin
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:27 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
Enable postcopy protocol feature except if dequeue
zero-copy is enabled. In this case, guest memory requires
to be populated, which is not compatible with userfaultfd.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/vhost_user.c | 7 +++++++
lib/librte_vhost/vhost_user.h | 3 ++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 83b080610..ef4e4e370 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1242,6 +1242,13 @@ vhost_user_get_protocol_features(struct virtio_net *dev,
if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK);
+ /*
+ * If dequeue zerocopy is enabled, guest memory requires to be
+ * populated, which is not compatible with postcopy.
+ */
+ if (dev->dequeue_zero_copy)
+ protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
+
msg->payload.u64 = protocol_features;
msg->size = sizeof(msg->payload.u64);
msg->fd_num = 0;
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 73b1fe2b9..dc97be843 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -22,7 +22,8 @@
(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
- (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER))
+ (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 11/12] vhost: add flag to enable postcopy live-migration
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
` (9 preceding siblings ...)
2018-09-26 7:27 ` [dpdk-dev] [PATCH 10/12] vhost: enable postcopy protocol feature Maxime Coquelin
@ 2018-09-26 7:27 ` Maxime Coquelin
2018-09-26 7:27 ` [dpdk-dev] [PATCH 12/12] net/vhost: add parameter to enable postcopy migration support Maxime Coquelin
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:27 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
Postcopy live-migration feature require the application to
not populate the guest memory. As the vhost library cannot
prevent the application to that (e.g. preventing the
application to call mlockall()), the feature is disabled by
default.
The application should only enable the feature if it does not
force the guest memory to be populated.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
doc/guides/prog_guide/vhost_lib.rst | 8 ++++++++
lib/librte_vhost/rte_vhost.h | 1 +
lib/librte_vhost/socket.c | 11 +++++++++--
3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst
index 77af4d775..c77df338f 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -106,6 +106,14 @@ The following is an overview of some key Vhost API functions:
Enabling this flag with these Qemu version results in Qemu being blocked
when multiple queue pairs are declared.
+ - ``RTE_VHOST_USER_POSTCOPY_SUPPORT``
+
+ Postcopy live-migration support will be enabled when this flag is set.
+ It is disabled by default.
+
+ Enabling this flag should only be done when the calling application does
+ not pre-fault the guest shared memory, otherwise migration would fail.
+
* ``rte_vhost_driver_set_features(path, features)``
This function sets the feature bits the vhost-user driver supports. The
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index b3cc6990d..b26afbffa 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -28,6 +28,7 @@ extern "C" {
#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
#define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
+#define RTE_VHOST_USER_POSTCOPY_SUPPORT (1ULL << 4)
/** Protocol features. */
#ifndef VHOST_USER_PROTOCOL_F_MQ
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index c04d3d305..a7beeeda4 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -51,6 +51,8 @@ struct vhost_user_socket {
uint64_t supported_features;
uint64_t features;
+ uint64_t protocol_features;
+
/*
* Device id to identify a specific backend device.
* It's set to -1 for the default software implementation.
@@ -731,7 +733,7 @@ rte_vhost_driver_get_protocol_features(const char *path,
did = vsocket->vdpa_dev_id;
vdpa_dev = rte_vdpa_get_device(did);
if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) {
- *protocol_features = VHOST_USER_PROTOCOL_FEATURES;
+ *protocol_features = vsocket->protocol_features;
goto unlock_exit;
}
@@ -744,7 +746,7 @@ rte_vhost_driver_get_protocol_features(const char *path,
goto unlock_exit;
}
- *protocol_features = VHOST_USER_PROTOCOL_FEATURES
+ *protocol_features = vsocket->protocol_features
& vdpa_protocol_features;
unlock_exit:
@@ -863,6 +865,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
vsocket->use_builtin_virtio_net = true;
vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES;
+ vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES;
/* Dequeue zero copy can't assure descriptors returned in order */
if (vsocket->dequeue_zero_copy) {
@@ -875,6 +878,10 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
}
+ if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT))
+ vsocket->protocol_features &=
+ ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
+
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
if (vsocket->reconnect && reconn_tid == 0) {
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* [dpdk-dev] [PATCH 12/12] net/vhost: add parameter to enable postcopy migration support
2018-09-26 7:26 [dpdk-dev] [PATCH 00/12] vhost: add postcopy live-migration support Maxime Coquelin
` (10 preceding siblings ...)
2018-09-26 7:27 ` [dpdk-dev] [PATCH 11/12] vhost: add flag to enable postcopy live-migration Maxime Coquelin
@ 2018-09-26 7:27 ` Maxime Coquelin
11 siblings, 0 replies; 23+ messages in thread
From: Maxime Coquelin @ 2018-09-26 7:27 UTC (permalink / raw)
To: dev, tiwei.bie, zhihong.wang, jfreimann; +Cc: dgilbert, Maxime Coquelin
Introduce a new postcopy-support parameter to Vhost PMD that
passes the RTE_VHOST_USER_POSTCOPY_SUPPORT flag at vhost
device register time.
Flag should only be set if application does not prefault guest
memory using, for example, mlockall() syscall.
Default value is 0, meaning that postcopy support is disabled
unless specified explicitly.
Example to enable postcopy support for a given device:
--vdev 'net_vhost0,iface=/tmp/vhost-user1,postcopy-support=1'
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
doc/guides/nics/vhost.rst | 5 +++++
drivers/net/vhost/rte_eth_vhost.c | 13 +++++++++++++
2 files changed, 18 insertions(+)
diff --git a/doc/guides/nics/vhost.rst b/doc/guides/nics/vhost.rst
index 4f7ae8990..23f2e87aa 100644
--- a/doc/guides/nics/vhost.rst
+++ b/doc/guides/nics/vhost.rst
@@ -71,6 +71,11 @@ The user can specify below arguments in `--vdev` option.
It is used to enable iommu support in vhost library.
(Default: 0 (disabled))
+#. ``postcopy-support``:
+
+ It is used to enable postcopy live-migration support in vhost library.
+ (Default: 0 (disabled))
+
Vhost PMD event handling
------------------------
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index aa6052221..1330f06ba 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -30,6 +30,7 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
#define ETH_VHOST_CLIENT_ARG "client"
#define ETH_VHOST_DEQUEUE_ZERO_COPY "dequeue-zero-copy"
#define ETH_VHOST_IOMMU_SUPPORT "iommu-support"
+#define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support"
#define VHOST_MAX_PKT_BURST 32
static const char *valid_arguments[] = {
@@ -38,6 +39,7 @@ static const char *valid_arguments[] = {
ETH_VHOST_CLIENT_ARG,
ETH_VHOST_DEQUEUE_ZERO_COPY,
ETH_VHOST_IOMMU_SUPPORT,
+ ETH_VHOST_POSTCOPY_SUPPORT,
NULL
};
@@ -1339,6 +1341,7 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
int client_mode = 0;
int dequeue_zero_copy = 0;
int iommu_support = 0;
+ int postcopy_support = 0;
struct rte_eth_dev *eth_dev;
const char *name = rte_vdev_device_name(dev);
@@ -1411,6 +1414,16 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
flags |= RTE_VHOST_USER_IOMMU_SUPPORT;
}
+ if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) {
+ ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT,
+ &open_int, &postcopy_support);
+ if (ret < 0)
+ goto out_free;
+
+ if (postcopy_support)
+ flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT;
+ }
+
if (dev->device.numa_node == SOCKET_ID_ANY)
dev->device.numa_node = rte_socket_id();
--
2.17.1
^ permalink raw reply [flat|nested] 23+ messages in thread