* [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
@ 2016-05-07 6:40 Yuanhan Liu
2016-05-07 6:40 ` [dpdk-dev] [PATCH 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
` (7 more replies)
0 siblings, 8 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-07 6:40 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
Both the vhost-user backend (DPDK here) and frontend (QEMU) could be
server, as well as client. DPDK just acts as server so far. This patch
set would make it possible to act as both.
A new arg (flags) is introduced for API rte_vhost_driver_register(). And the
client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this
implies an API breakage. However, since this release deals with ABI/API
refactoring, it should not be an issue.
With the DPDK as client, it's easier to implement the "reconnect" ability,
which means we could still make vhost-user work after DPDK restarts.
---
Yuanhan Liu (6):
vhost: rename structs for enabling client mode
vhost: add vhost-user client mode
vhost: add reconnect ability
vhost: workaround stale vring base
examples/vhost: add client and reconnect option
vhost: add pmd client and reconnect option
drivers/net/vhost/rte_eth_vhost.c | 54 +++-
examples/vhost/main.c | 23 +-
lib/librte_vhost/rte_virtio_net.h | 12 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 355 ++++++++++++++++++---------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
lib/librte_vhost/virtio-net.c | 8 +
6 files changed, 313 insertions(+), 145 deletions(-)
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH 1/6] vhost: rename structs for enabling client mode
2016-05-07 6:40 [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
@ 2016-05-07 6:40 ` Yuanhan Liu
2016-05-07 6:40 ` [dpdk-dev] [PATCH 2/6] vhost: add vhost-user " Yuanhan Liu
` (6 subsequent siblings)
7 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-07 6:40 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
DPDK vhost-user just acts as server so far, so, using a struct named
as "vhost_server" is okay. However, if we add the support of DPDK
vhost-user acting as client, it doesn't make sense any more. Here
rename it to "vhost_user_socket".
There was too much wrong for "connfd_ctx", but I think it's obviously
better to rename it to "vhost_user_connection", as it does represent
a connection, a connection between the backend (DPDK) and the frontend
(QEMU).
Similarly, few more renames are taken, such as "vserver_new_vq_conn"
to "vhost_user_new_connection".
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
lib/librte_vhost/vhost_user/vhost-net-user.c | 131 ++++++++++++++-------------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 --
2 files changed, 70 insertions(+), 67 deletions(-)
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 68fc9b9..f485a3b 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -51,32 +51,41 @@
#include "vhost-net.h"
#include "virtio-net-user.h"
-#define MAX_VIRTIO_BACKLOG 128
-
-static void vserver_new_vq_conn(int fd, void *data, int *remove);
-static void vserver_message_handler(int fd, void *dat, int *remove);
+/*
+ * Every time rte_vhost_driver_register() is invoked, an associated
+ * vhost_user_socket struct will be created.
+ */
+struct vhost_user_socket {
+ char *path;
+ int listenfd;
+};
-struct connfd_ctx {
- struct vhost_server *vserver;
+struct vhost_user_connection {
+ struct vhost_user_socket *vsocket;
int vid;
};
-#define MAX_VHOST_SERVER 1024
-struct _vhost_server {
- struct vhost_server *server[MAX_VHOST_SERVER];
+#define MAX_VHOST_SOCKET 1024
+struct vhost_user {
+ struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
struct fdset fdset;
- int vserver_cnt;
- pthread_mutex_t server_mutex;
+ int vsocket_cnt;
+ pthread_mutex_t mutex;
};
-static struct _vhost_server g_vhost_server = {
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vhost_user_new_connection(int fd, void *data, int *remove);
+static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+
+static struct vhost_user vhost_user = {
.fdset = {
.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
.num = 0
},
- .vserver_cnt = 0,
- .server_mutex = PTHREAD_MUTEX_INITIALIZER,
+ .vsocket_cnt = 0,
+ .mutex = PTHREAD_MUTEX_INITIALIZER,
};
static const char *vhost_message_str[VHOST_USER_MAX] = {
@@ -278,13 +287,13 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
return ret;
}
-/* call back when there is new virtio connection. */
+/* call back when there is new vhost-user connection. */
static void
-vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
+vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
{
- struct vhost_server *vserver = (struct vhost_server *)dat;
+ struct vhost_user_socket *vsocket = dat;
int conn_fd;
- struct connfd_ctx *ctx;
+ struct vhost_user_connection *conn;
int vid;
unsigned int size;
@@ -294,41 +303,41 @@ vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
if (conn_fd < 0)
return;
- ctx = calloc(1, sizeof(*ctx));
- if (ctx == NULL) {
+ conn = calloc(1, sizeof(*conn));
+ if (conn == NULL) {
close(conn_fd);
return;
}
vid = vhost_new_device();
if (vid == -1) {
- free(ctx);
+ free(conn);
close(conn_fd);
return;
}
- size = strnlen(vserver->path, PATH_MAX);
- vhost_set_ifname(vid, vserver->path, size);
+ size = strnlen(vsocket->path, PATH_MAX);
+ vhost_set_ifname(vid, vsocket->path, size);
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
- ctx->vserver = vserver;
- ctx->vid = vid;
- fdset_add(&g_vhost_server.fdset,
- conn_fd, vserver_message_handler, NULL, ctx);
+ conn->vsocket = vsocket;
+ conn->vid = vid;
+ fdset_add(&vhost_user.fdset,
+ conn_fd, vhost_user_msg_handler, NULL, conn);
}
/* callback when there is message on the connfd */
static void
-vserver_message_handler(int connfd, void *dat, int *remove)
+vhost_user_msg_handler(int connfd, void *dat, int *remove)
{
int vid;
- struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
+ struct vhost_user_connection *conn = dat;
struct VhostUserMsg msg;
uint64_t features;
int ret;
- vid = cfd_ctx->vid;
+ vid = conn->vid;
ret = read_vhost_message(connfd, &msg);
if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
if (ret < 0)
@@ -343,7 +352,7 @@ vserver_message_handler(int connfd, void *dat, int *remove)
close(connfd);
*remove = 1;
- free(cfd_ctx);
+ free(conn);
vhost_destroy_device(vid);
return;
@@ -449,37 +458,37 @@ vserver_message_handler(int connfd, void *dat, int *remove)
int
rte_vhost_driver_register(const char *path)
{
- struct vhost_server *vserver;
+ struct vhost_user_socket *vsocket;
- pthread_mutex_lock(&g_vhost_server.server_mutex);
+ pthread_mutex_lock(&vhost_user.mutex);
- if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
+ if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
RTE_LOG(ERR, VHOST_CONFIG,
"error: the number of servers reaches maximum\n");
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
- vserver = calloc(sizeof(struct vhost_server), 1);
- if (vserver == NULL) {
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ vsocket = calloc(sizeof(struct vhost_user_socket), 1);
+ if (vsocket == NULL) {
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
- vserver->listenfd = uds_socket(path);
- if (vserver->listenfd < 0) {
- free(vserver);
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ vsocket->listenfd = uds_socket(path);
+ if (vsocket->listenfd < 0) {
+ free(vsocket);
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
- vserver->path = strdup(path);
+ vsocket->path = strdup(path);
- fdset_add(&g_vhost_server.fdset, vserver->listenfd,
- vserver_new_vq_conn, NULL, vserver);
+ fdset_add(&vhost_user.fdset, vsocket->listenfd,
+ vhost_user_new_connection, NULL, vsocket);
- g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+ pthread_mutex_unlock(&vhost_user.mutex);
return 0;
}
@@ -494,28 +503,28 @@ rte_vhost_driver_unregister(const char *path)
int i;
int count;
- pthread_mutex_lock(&g_vhost_server.server_mutex);
+ pthread_mutex_lock(&vhost_user.mutex);
- for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
- if (!strcmp(g_vhost_server.server[i]->path, path)) {
- fdset_del(&g_vhost_server.fdset,
- g_vhost_server.server[i]->listenfd);
+ for (i = 0; i < vhost_user.vsocket_cnt; i++) {
+ if (!strcmp(vhost_user.vsockets[i]->path, path)) {
+ fdset_del(&vhost_user.fdset,
+ vhost_user.vsockets[i]->listenfd);
- close(g_vhost_server.server[i]->listenfd);
- free(g_vhost_server.server[i]->path);
- free(g_vhost_server.server[i]);
+ close(vhost_user.vsockets[i]->listenfd);
+ free(vhost_user.vsockets[i]->path);
+ free(vhost_user.vsockets[i]);
unlink(path);
- count = --g_vhost_server.vserver_cnt;
- g_vhost_server.server[i] = g_vhost_server.server[count];
- g_vhost_server.server[count] = NULL;
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ count = --vhost_user.vsocket_cnt;
+ vhost_user.vsockets[i] = vhost_user.vsockets[count];
+ vhost_user.vsockets[count] = NULL;
+ pthread_mutex_unlock(&vhost_user.mutex);
return 0;
}
}
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
@@ -523,6 +532,6 @@ rte_vhost_driver_unregister(const char *path)
int
rte_vhost_driver_session_start(void)
{
- fdset_event_dispatch(&g_vhost_server.fdset);
+ fdset_event_dispatch(&vhost_user.fdset);
return 0;
}
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
index e3bb413..d902595 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -38,12 +38,6 @@
#include <linux/vhost.h>
#include "rte_virtio_net.h"
-#include "fd_man.h"
-
-struct vhost_server {
- char *path; /**< The path the uds is bind to. */
- int listenfd; /**< The listener sockfd. */
-};
/* refer to hw/virtio/vhost-user.c */
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH 2/6] vhost: add vhost-user client mode
2016-05-07 6:40 [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
2016-05-07 6:40 ` [dpdk-dev] [PATCH 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
@ 2016-05-07 6:40 ` Yuanhan Liu
2016-05-09 10:33 ` Victor Kaplansky
2016-05-07 6:40 ` [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability Yuanhan Liu
` (5 subsequent siblings)
7 siblings, 1 reply; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-07 6:40 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
Add a new paramter (flags) to rte_vhost_driver_register(). DPDK
vhost-user acts as client mode when RTE_VHOST_USER_CLIENT flag
is set.
The flags would also allow future extensions without breaking the
API (again).
The rest is straingfoward then: allocate a unix socket, and
bind/listen for server, connect for client.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
drivers/net/vhost/rte_eth_vhost.c | 2 +-
examples/vhost/main.c | 2 +-
lib/librte_vhost/rte_virtio_net.h | 11 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 215 ++++++++++++++++-----------
4 files changed, 142 insertions(+), 88 deletions(-)
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index a9dada5..36697cf 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -456,7 +456,7 @@ eth_dev_start(struct rte_eth_dev *dev)
int ret = 0;
if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
- ret = rte_vhost_driver_register(internal->iface_name);
+ ret = rte_vhost_driver_register(internal->iface_name, 0);
if (ret)
return ret;
}
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index bbf0d28..6899189 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1499,7 +1499,7 @@ main(int argc, char *argv[])
rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
/* Register vhost(cuse or user) driver to handle vhost messages. */
- ret = rte_vhost_driver_register((char *)&dev_basename);
+ ret = rte_vhost_driver_register(dev_basename, 0);
if (ret != 0)
rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 4e50425..c84e7ab 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -51,6 +51,8 @@
#include <rte_mempool.h>
#include <rte_ether.h>
+#define RTE_VHOST_USER_CLIENT (1ULL << 0)
+
struct rte_mbuf;
#define VHOST_MEMORY_MAX_NREGIONS 8
@@ -96,11 +98,14 @@ uint64_t rte_vhost_feature_get(void);
int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
-/* Register vhost driver. dev_name could be different for multiple instance support. */
-int rte_vhost_driver_register(const char *dev_name);
+/**
+ * Register vhost driver. path could be different for multiple
+ * instance support.
+ */
+int rte_vhost_driver_register(const char *path, uint64_t flags);
/* Unregister vhost driver. This is only meaningful to vhost user. */
-int rte_vhost_driver_unregister(const char *dev_name);
+int rte_vhost_driver_unregister(const char *path);
/* Register callbacks. */
int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index f485a3b..aa98717 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -58,6 +58,7 @@
struct vhost_user_socket {
char *path;
int listenfd;
+ int is_server;
};
struct vhost_user_connection {
@@ -75,7 +76,7 @@ struct vhost_user {
#define MAX_VIRTIO_BACKLOG 128
-static void vhost_user_new_connection(int fd, void *data, int *remove);
+static void vhost_user_server_new_connection(int fd, void *data, int *remove);
static void vhost_user_msg_handler(int fd, void *dat, int *remove);
static struct vhost_user vhost_user = {
@@ -111,48 +112,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
};
-/**
- * Create a unix domain socket, bind to path and listen for connection.
- * @return
- * socket fd or -1 on failure
- */
-static int
-uds_socket(const char *path)
-{
- struct sockaddr_un un;
- int sockfd;
- int ret;
-
- if (path == NULL)
- return -1;
-
- sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
- if (sockfd < 0)
- return -1;
- RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
-
- memset(&un, 0, sizeof(un));
- un.sun_family = AF_UNIX;
- snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
- ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
- if (ret == -1) {
- RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n",
- sockfd, path);
- goto err;
- }
- RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
-
- ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
- if (ret == -1)
- goto err;
-
- return sockfd;
-
-err:
- close(sockfd);
- return -1;
-}
-
/* return bytes# of read on success or negative val on failure. */
static int
read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
@@ -287,32 +246,24 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
return ret;
}
-/* call back when there is new vhost-user connection. */
+
static void
-vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
+vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
{
- struct vhost_user_socket *vsocket = dat;
- int conn_fd;
- struct vhost_user_connection *conn;
int vid;
- unsigned int size;
-
- conn_fd = accept(fd, NULL, NULL);
- RTE_LOG(INFO, VHOST_CONFIG,
- "new virtio connection is %d\n", conn_fd);
- if (conn_fd < 0)
- return;
+ size_t size;
+ struct vhost_user_connection *conn;
- conn = calloc(1, sizeof(*conn));
+ conn = malloc(sizeof(*conn));
if (conn == NULL) {
- close(conn_fd);
+ close(fd);
return;
}
vid = vhost_new_device();
if (vid == -1) {
+ close(fd);
free(conn);
- close(conn_fd);
return;
}
@@ -323,8 +274,21 @@ vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
conn->vsocket = vsocket;
conn->vid = vid;
- fdset_add(&vhost_user.fdset,
- conn_fd, vhost_user_msg_handler, NULL, conn);
+ fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn);
+}
+
+/* call back when there is new vhost-user connection from client */
+static void
+vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
+{
+ struct vhost_user_socket *vsocket = dat;
+
+ fd = accept(fd, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
+ vhost_user_add_connection(fd, vsocket);
}
/* callback when there is message on the connfd */
@@ -452,50 +416,135 @@ vhost_user_msg_handler(int connfd, void *dat, int *remove)
}
}
-/**
- * Creates and initialise the vhost server.
- */
-int
-rte_vhost_driver_register(const char *path)
+static int
+create_unix_socket(const char *path, struct sockaddr_un *un, int is_server)
{
- struct vhost_user_socket *vsocket;
+ int fd;
- pthread_mutex_lock(&vhost_user.mutex);
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -1;
+ RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
+ is_server ? "server" : "client", fd);
- if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "error: the number of servers reaches maximum\n");
- pthread_mutex_unlock(&vhost_user.mutex);
+ memset(un, 0, sizeof(*un));
+ un->sun_family = AF_UNIX;
+ strncpy(un->sun_path, path, sizeof(un->sun_path));
+
+ return fd;
+}
+
+static int
+vhost_user_create_server(struct vhost_user_socket *vsocket)
+{
+ int fd;
+ int ret;
+ struct sockaddr_un un;
+ const char *path = vsocket->path;
+
+ fd = create_unix_socket(path, &un, vsocket->is_server);
+ if (fd < 0)
return -1;
+
+ ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to bind to %s: %s; remove it and try again\n",
+ path, strerror(errno));
+ goto err;
}
+ RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
- vsocket = calloc(sizeof(struct vhost_user_socket), 1);
- if (vsocket == NULL) {
- pthread_mutex_unlock(&vhost_user.mutex);
+ ret = listen(fd, MAX_VIRTIO_BACKLOG);
+ if (ret < 0)
+ goto err;
+
+ vsocket->listenfd = fd;
+ fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
+ NULL, vsocket);
+
+ return 0;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static int
+vhost_user_create_client(struct vhost_user_socket *vsocket)
+{
+ int fd;
+ int ret;
+ struct sockaddr_un un;
+ const char *path = vsocket->path;
+
+ fd = create_unix_socket(path, &un, vsocket->is_server);
+ if (fd < 0)
+ return -1;
+
+ ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
+ path, strerror(errno));
+ close(fd);
return -1;
}
- vsocket->listenfd = uds_socket(path);
- if (vsocket->listenfd < 0) {
- free(vsocket);
- pthread_mutex_unlock(&vhost_user.mutex);
+ vhost_user_add_connection(fd, vsocket);
+
+ return 0;
+}
+
+/*
+ * Register a new vhost-user socket; here we could act as server
+ * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
+ * is set.
+ */
+int
+rte_vhost_driver_register(const char *path, uint64_t flags)
+{
+ int ret = -1;
+ struct vhost_user_socket *vsocket;
+
+ if (!path)
return -1;
+
+ pthread_mutex_lock(&vhost_user.mutex);
+
+ if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "error: the number of vhost sockets reaches maximum\n");
+ goto out;
}
+ vsocket = malloc(sizeof(struct vhost_user_socket));
+ if (!vsocket)
+ goto out;
+ memset(vsocket, 0, sizeof(struct vhost_user_socket));
vsocket->path = strdup(path);
- fdset_add(&vhost_user.fdset, vsocket->listenfd,
- vhost_user_new_connection, NULL, vsocket);
+ if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+ ret = vhost_user_create_client(vsocket);
+ } else {
+ vsocket->is_server = 1;
+ ret = vhost_user_create_server(vsocket);
+ }
+ if (ret < 0) {
+ free(vsocket->path);
+ free(vsocket);
+ goto out;
+ }
vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+
+out:
pthread_mutex_unlock(&vhost_user.mutex);
- return 0;
+ return ret;
}
-
/**
- * Unregister the specified vhost server
+ * Unregister the specified vhost socket
*/
int
rte_vhost_driver_unregister(const char *path)
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-07 6:40 [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
2016-05-07 6:40 ` [dpdk-dev] [PATCH 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
2016-05-07 6:40 ` [dpdk-dev] [PATCH 2/6] vhost: add vhost-user " Yuanhan Liu
@ 2016-05-07 6:40 ` Yuanhan Liu
2016-05-09 16:47 ` Xie, Huawei
2016-05-07 6:40 ` [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base Yuanhan Liu
` (4 subsequent siblings)
7 siblings, 1 reply; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-07 6:40 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
Allow reconnecting on failure when both RTE_VHOST_USER_RECONNECT and
RTE_VHOST_USER_CLIENT flags are set.
Reconnecting means two things here:
- when DPDK app starts first and QEMU (as the server) is not started,
without reconnecting, DPDK app would simply fail on vhost-user
registration.
- when QEMU reboots, without reconnecting, you can't re-establish the
connection without restarting DPDK app.
This patch make it work well for both above cases. It simply creates
a new thread, and keep trying calling "connect()", until it succeeds.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
lib/librte_vhost/rte_virtio_net.h | 1 +
lib/librte_vhost/vhost_user/vhost-net-user.c | 63 +++++++++++++++++++++++++---
2 files changed, 59 insertions(+), 5 deletions(-)
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index c84e7ab..f354d52 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -52,6 +52,7 @@
#include <rte_ether.h>
#define RTE_VHOST_USER_CLIENT (1ULL << 0)
+#define RTE_VHOST_USER_RECONNECT (1ULL << 1)
struct rte_mbuf;
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index aa98717..07bce6e 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -59,6 +59,7 @@ struct vhost_user_socket {
char *path;
int listenfd;
int is_server;
+ int reconnect;
};
struct vhost_user_connection {
@@ -78,6 +79,7 @@ struct vhost_user {
static void vhost_user_server_new_connection(int fd, void *data, int *remove);
static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+static int vhost_user_create_client(struct vhost_user_socket *vsocket);
static struct vhost_user vhost_user = {
.fdset = {
@@ -304,6 +306,8 @@ vhost_user_msg_handler(int connfd, void *dat, int *remove)
vid = conn->vid;
ret = read_vhost_message(connfd, &msg);
if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+ struct vhost_user_socket *vsocket = conn->vsocket;
+
if (ret < 0)
RTE_LOG(ERR, VHOST_CONFIG,
"vhost read message failed\n");
@@ -319,6 +323,9 @@ vhost_user_msg_handler(int connfd, void *dat, int *remove)
free(conn);
vhost_destroy_device(vid);
+ if (vsocket->reconnect)
+ vhost_user_create_client(vsocket);
+
return;
}
@@ -470,6 +477,33 @@ err:
return -1;
}
+struct reconnect_info {
+ struct sockaddr_un un;
+ int fd;
+ struct vhost_user_socket *vsocket;
+};
+
+static void *
+vhost_user_client_reconnect(void *arg)
+{
+ struct reconnect_info *reconn = arg;
+ int ret;
+
+ RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
+ while (1) {
+ ret = connect(reconn->fd, (struct sockaddr *)&reconn->un,
+ sizeof(reconn->un));
+ if (ret == 0)
+ break;
+ sleep(1);
+ }
+
+ vhost_user_add_connection(reconn->fd, reconn->vsocket);
+ free(reconn);
+
+ return NULL;
+}
+
static int
vhost_user_create_client(struct vhost_user_socket *vsocket)
{
@@ -477,22 +511,40 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
int ret;
struct sockaddr_un un;
const char *path = vsocket->path;
+ struct reconnect_info *reconn;
+ pthread_t tid;
fd = create_unix_socket(path, &un, vsocket->is_server);
if (fd < 0)
return -1;
ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
- path, strerror(errno));
+ if (ret == 0) {
+ vhost_user_add_connection(fd, vsocket);
+ return 0;
+ }
+
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to connect to %s: %s\n",
+ path, strerror(errno));
+
+ if (!vsocket->reconnect) {
close(fd);
return -1;
}
- vhost_user_add_connection(fd, vsocket);
+ /* Create a thread to try reconnecting, to not block the caller. */
+ reconn = malloc(sizeof(*reconn));
+ reconn->un = un;
+ reconn->fd = fd;
+ reconn->vsocket = vsocket;
+ ret = pthread_create(&tid, NULL, vhost_user_client_reconnect, reconn);
+ if (ret < 0) {
+ close(fd);
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
+ }
- return 0;
+ return ret;
}
/*
@@ -524,6 +576,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
vsocket->path = strdup(path);
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+ vsocket->reconnect = !!(flags & RTE_VHOST_USER_RECONNECT);
ret = vhost_user_create_client(vsocket);
} else {
vsocket->is_server = 1;
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
2016-05-07 6:40 [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
` (2 preceding siblings ...)
2016-05-07 6:40 ` [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability Yuanhan Liu
@ 2016-05-07 6:40 ` Yuanhan Liu
2016-05-09 10:45 ` Victor Kaplansky
` (3 more replies)
2016-05-07 6:40 ` [dpdk-dev] [PATCH 5/6] examples/vhost: add client and reconnect option Yuanhan Liu
` (3 subsequent siblings)
7 siblings, 4 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-07 6:40 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu, Michael S. Tsirkin
When DPDK app crashes (or quits, or gets killed), and when QEMU supports
reconnecting (patches have been sent, not merged yet), a restart of DPDK
app would get stale vring base from QEMU. That would break the kernel
virtio net completely, making it non-work any more, unless a driver reset
is done.
So, instead of getting the stale vring base from QEMU, Huawei suggested
we could get a proper one from used->idx.
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Suggested-by: "Xie, Huawei" <huawei.xie@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
Note that a right way to handle reconnect from abnormal quit would be
let the guest driver to initiate a reset when reconnect is detected
from QEMU. As a reset from the virtio net driver would resets all virtio
related meta datas, and trigger the vhost-user re-initiation again,
therefore, it would make the reconnect work as expected.
What's unforunate is that driver reset on reconnect, as the term "driver"
implies, need driver hackings, which could be a linux kernel virtio net
driver, DPDK pmd driver, or even, the windows driver. Apparently, it will
not work so far, or even for a long while, until we are adapted to the
new kernel.
The fix (or more precisely, the workaround) from this patch would make
reconnect work in most case, including the following two hypothesis that
might corrupt virtio memory:
- vring_used_elem might be in stale state when we are in the middle of
updating used vring. Say, we might have updated the "id" field, but
leaving "len" untouched.
- vring desc buff might also be in stale state, when we are copying
data into it.
The reason it still works is that we haven't updated used->idx yet,
which means guest driver will not start processing those buggy used
entries. Therefore, no issues.
However, Michael claims some concerns: he made a good point: a crash
is happening means some memory is corrupted, and it could be the virtio
memory being corrupted. In such case, nothing will work without the
reset.
But I would say, that an app in product use would rare crash, and even
if it crashes, the chance that virtio memory being corrupted would be
relatively smaller then. Besides that, it would work, say when the app
is killed by ctrl-c or kill command. So, it works in the most cases.
But still, I would say it's just a workaround so far.
On the other hand, without this patch, it would be 100% not working
from abnormal quit. But with this patch, it works in most cases, just
don't work in a rare case that when virtio memory is corrupted. Therefore,
I'd say, it is still good to have, until we have a perfect solution.
---
lib/librte_vhost/virtio-net.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index c88aaa3..df103aa 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -560,6 +560,14 @@ vhost_set_vring_addr(int vid, struct vhost_vring_addr *addr)
return -1;
}
+ if (vq->last_used_idx != vq->used->idx) {
+ RTE_LOG(WARNING, VHOST_CONFIG,
+ "last_used_idx (%u) and vq->used->idx (%u) mismatch\n",
+ vq->last_used_idx, vq->used->idx);
+ vq->last_used_idx = vq->used->idx;
+ vq->last_used_idx_res = vq->used->idx;
+ }
+
vq->log_guest_addr = addr->log_guest_addr;
LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH 5/6] examples/vhost: add client and reconnect option
2016-05-07 6:40 [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
` (3 preceding siblings ...)
2016-05-07 6:40 ` [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base Yuanhan Liu
@ 2016-05-07 6:40 ` Yuanhan Liu
2016-05-09 10:47 ` Victor Kaplansky
2016-05-07 6:40 ` [dpdk-dev] [PATCH 6/6] vhost: add pmd " Yuanhan Liu
` (2 subsequent siblings)
7 siblings, 1 reply; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-07 6:40 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu
Add --client and --reconnect option to enable the client mode and
reconnect mode, respectively. --rconnect works only when --client
is given as well.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
examples/vhost/main.c | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 6899189..26c4d5f 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -132,6 +132,9 @@ static uint32_t enable_tx_csum;
/* Disable TSO offload */
static uint32_t enable_tso;
+static int client_mode;
+static int reconnect;
+
/* Specify timeout (in useconds) between retries on RX. */
static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
/* Specify the number of retries on RX. */
@@ -459,7 +462,9 @@ us_vhost_usage(const char *prgname)
" --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
" --dev-basename: The basename to be used for the character device.\n"
" --tx-csum [0|1] disable/enable TX checksum offload.\n"
- " --tso [0|1] disable/enable TCP segment offload.\n",
+ " --tso [0|1] disable/enable TCP segment offload.\n"
+ " --client register a vhost-user socket as client mode.\n"
+ " --reconnect reconnect to vhost-user server when disconnects.\n",
prgname);
}
@@ -484,6 +489,8 @@ us_vhost_parse_args(int argc, char **argv)
{"dev-basename", required_argument, NULL, 0},
{"tx-csum", required_argument, NULL, 0},
{"tso", required_argument, NULL, 0},
+ {"client", no_argument, &client_mode, 1},
+ {"reconnect", no_argument, &reconnect, 1},
{NULL, 0, 0, 0},
};
@@ -647,6 +654,12 @@ us_vhost_parse_args(int argc, char **argv)
}
}
+ if (reconnect && !client_mode) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "--reconnect works only when --client is specified\n");
+ return -1;
+ }
+
for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
if (enabled_port_mask & (1 << i))
ports[num_ports++] = (uint8_t)i;
@@ -1406,6 +1419,7 @@ main(int argc, char *argv[])
uint8_t portid;
static pthread_t tid;
char thread_name[RTE_MAX_THREAD_NAME_LEN];
+ uint64_t flags = 0;
signal(SIGINT, sigint_handler);
@@ -1498,8 +1512,13 @@ main(int argc, char *argv[])
if (mergeable == 0)
rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
+ if (client_mode)
+ flags |= RTE_VHOST_USER_CLIENT;
+ if (reconnect)
+ flags |= RTE_VHOST_USER_RECONNECT;
+
/* Register vhost(cuse or user) driver to handle vhost messages. */
- ret = rte_vhost_driver_register(dev_basename, 0);
+ ret = rte_vhost_driver_register(dev_basename, flags);
if (ret != 0)
rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH 6/6] vhost: add pmd client and reconnect option
2016-05-07 6:40 [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
` (4 preceding siblings ...)
2016-05-07 6:40 ` [dpdk-dev] [PATCH 5/6] examples/vhost: add client and reconnect option Yuanhan Liu
@ 2016-05-07 6:40 ` Yuanhan Liu
2016-05-09 10:54 ` Victor Kaplansky
2016-05-10 3:23 ` [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Xu, Qian Q
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
7 siblings, 1 reply; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-07 6:40 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Yuanhan Liu, Tetsuya Mukawa
Add client and reconnect option to vhost pmd. reconnect only works when
client is given as well.
Cc: Tetsuya Mukawa <mukawa@igel.co.jp>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
drivers/net/vhost/rte_eth_vhost.c | 54 ++++++++++++++++++++++++++++++---------
1 file changed, 42 insertions(+), 12 deletions(-)
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 36697cf..7636ef8 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -47,12 +47,16 @@
#define ETH_VHOST_IFACE_ARG "iface"
#define ETH_VHOST_QUEUES_ARG "queues"
+#define ETH_VHOST_CLIENT_ARG "client"
+#define ETH_VHOST_RECONNECT_ARG "reconnect"
static const char *drivername = "VHOST PMD";
static const char *valid_arguments[] = {
ETH_VHOST_IFACE_ARG,
ETH_VHOST_QUEUES_ARG,
+ ETH_VHOST_CLIENT_ARG,
+ ETH_VHOST_RECONNECT_ARG,
NULL
};
@@ -87,6 +91,7 @@ struct pmd_internal {
char *dev_name;
char *iface_name;
uint16_t max_queues;
+ uint64_t flags;
volatile uint16_t once;
};
@@ -456,7 +461,8 @@ eth_dev_start(struct rte_eth_dev *dev)
int ret = 0;
if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
- ret = rte_vhost_driver_register(internal->iface_name, 0);
+ ret = rte_vhost_driver_register(internal->iface_name,
+ internal->flags);
if (ret)
return ret;
}
@@ -661,7 +667,7 @@ static const struct eth_dev_ops ops = {
static int
eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues,
- const unsigned numa_node)
+ const unsigned numa_node, uint64_t flags)
{
struct rte_eth_dev_data *data = NULL;
struct pmd_internal *internal = NULL;
@@ -718,6 +724,7 @@ eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues,
internal->iface_name = strdup(iface_name);
if (internal->iface_name == NULL)
goto error;
+ internal->flags = flags;
list->eth_dev = eth_dev;
pthread_mutex_lock(&internal_list_lock);
@@ -782,18 +789,15 @@ open_iface(const char *key __rte_unused, const char *value, void *extra_args)
}
static inline int
-open_queues(const char *key __rte_unused, const char *value, void *extra_args)
+open_int(const char *key __rte_unused, const char *value, void *extra_args)
{
- uint16_t *q = extra_args;
+ uint16_t *n = extra_args;
if (value == NULL || extra_args == NULL)
return -EINVAL;
- *q = (uint16_t)strtoul(value, NULL, 0);
- if (*q == USHRT_MAX && errno == ERANGE)
- return -1;
-
- if (*q > RTE_MAX_QUEUES_PER_PORT)
+ *n = (uint16_t)strtoul(value, NULL, 0);
+ if (*n == USHRT_MAX && errno == ERANGE)
return -1;
return 0;
@@ -806,6 +810,9 @@ rte_pmd_vhost_devinit(const char *name, const char *params)
int ret = 0;
char *iface_name;
uint16_t queues;
+ uint64_t flags = 0;
+ int client_mode;
+ int reconnect;
RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", name);
@@ -825,14 +832,37 @@ rte_pmd_vhost_devinit(const char *name, const char *params)
if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
- &open_queues, &queues);
- if (ret < 0)
+ &open_int, &queues);
+ if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT)
goto out_free;
} else
queues = 1;
- eth_dev_vhost_create(name, iface_name, queues, rte_socket_id());
+ if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) {
+ ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG,
+ &open_int, &client_mode);
+ if (ret < 0)
+ goto out_free;
+ }
+ if (rte_kvargs_count(kvlist, ETH_VHOST_RECONNECT_ARG) == 1) {
+ ret = rte_kvargs_process(kvlist, ETH_VHOST_RECONNECT_ARG,
+ &open_int, &reconnect);
+ if (ret < 0)
+ goto out_free;
+ }
+ if (client_mode)
+ flags |= RTE_VHOST_USER_CLIENT;
+ if (reconnect)
+ flags |= RTE_VHOST_USER_RECONNECT;
+ if (reconnect && !client_mode) {
+ RTE_LOG(ERR, PMD,
+ "reconnect works only when client is specified\n");
+ ret = -1;
+ goto out_free;
+ }
+
+ eth_dev_vhost_create(name, iface_name, queues, rte_socket_id(), flags);
out_free:
rte_kvargs_free(kvlist);
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 2/6] vhost: add vhost-user client mode
2016-05-07 6:40 ` [dpdk-dev] [PATCH 2/6] vhost: add vhost-user " Yuanhan Liu
@ 2016-05-09 10:33 ` Victor Kaplansky
2016-05-09 20:33 ` Yuanhan Liu
0 siblings, 1 reply; 50+ messages in thread
From: Victor Kaplansky @ 2016-05-09 10:33 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: dev, huawei xie, Michael S. Tsirkin
Adding a flag for a future extension seems fine to me.
Could we manage without adding a flag?
For example, could we always try a client mode for a while at first,
and if unsuccessful, then come up with server mode?
-- Victor
----- Original Message -----
> From: "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> To: dev@dpdk.org
> Cc: "huawei xie" <huawei.xie@intel.com>, "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> Sent: Saturday, May 7, 2016 9:40:20 AM
> Subject: [dpdk-dev] [PATCH 2/6] vhost: add vhost-user client mode
>
> Add a new paramter (flags) to rte_vhost_driver_register(). DPDK
> vhost-user acts as client mode when RTE_VHOST_USER_CLIENT flag
> is set.
>
> The flags would also allow future extensions without breaking the
> API (again).
>
> The rest is straingfoward then: allocate a unix socket, and
> bind/listen for server, connect for client.
>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
> drivers/net/vhost/rte_eth_vhost.c | 2 +-
> examples/vhost/main.c | 2 +-
> lib/librte_vhost/rte_virtio_net.h | 11 +-
> lib/librte_vhost/vhost_user/vhost-net-user.c | 215
> ++++++++++++++++-----------
> 4 files changed, 142 insertions(+), 88 deletions(-)
>
> diff --git a/drivers/net/vhost/rte_eth_vhost.c
> b/drivers/net/vhost/rte_eth_vhost.c
> index a9dada5..36697cf 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -456,7 +456,7 @@ eth_dev_start(struct rte_eth_dev *dev)
> int ret = 0;
>
> if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
> - ret = rte_vhost_driver_register(internal->iface_name);
> + ret = rte_vhost_driver_register(internal->iface_name, 0);
> if (ret)
> return ret;
> }
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index bbf0d28..6899189 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -1499,7 +1499,7 @@ main(int argc, char *argv[])
> rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
>
> /* Register vhost(cuse or user) driver to handle vhost messages. */
> - ret = rte_vhost_driver_register((char *)&dev_basename);
> + ret = rte_vhost_driver_register(dev_basename, 0);
> if (ret != 0)
> rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
>
> diff --git a/lib/librte_vhost/rte_virtio_net.h
> b/lib/librte_vhost/rte_virtio_net.h
> index 4e50425..c84e7ab 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -51,6 +51,8 @@
> #include <rte_mempool.h>
> #include <rte_ether.h>
>
> +#define RTE_VHOST_USER_CLIENT (1ULL << 0)
> +
> struct rte_mbuf;
>
> #define VHOST_MEMORY_MAX_NREGIONS 8
> @@ -96,11 +98,14 @@ uint64_t rte_vhost_feature_get(void);
>
> int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int
> enable);
>
> -/* Register vhost driver. dev_name could be different for multiple instance
> support. */
> -int rte_vhost_driver_register(const char *dev_name);
> +/**
> + * Register vhost driver. path could be different for multiple
> + * instance support.
> + */
> +int rte_vhost_driver_register(const char *path, uint64_t flags);
>
> /* Unregister vhost driver. This is only meaningful to vhost user. */
> -int rte_vhost_driver_unregister(const char *dev_name);
> +int rte_vhost_driver_unregister(const char *path);
>
> /* Register callbacks. */
> int rte_vhost_driver_callback_register(struct virtio_net_device_ops const *
> const);
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c
> b/lib/librte_vhost/vhost_user/vhost-net-user.c
> index f485a3b..aa98717 100644
> --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> @@ -58,6 +58,7 @@
> struct vhost_user_socket {
> char *path;
> int listenfd;
> + int is_server;
> };
>
> struct vhost_user_connection {
> @@ -75,7 +76,7 @@ struct vhost_user {
>
> #define MAX_VIRTIO_BACKLOG 128
>
> -static void vhost_user_new_connection(int fd, void *data, int *remove);
> +static void vhost_user_server_new_connection(int fd, void *data, int
> *remove);
> static void vhost_user_msg_handler(int fd, void *dat, int *remove);
>
> static struct vhost_user vhost_user = {
> @@ -111,48 +112,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
> [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
> };
>
> -/**
> - * Create a unix domain socket, bind to path and listen for connection.
> - * @return
> - * socket fd or -1 on failure
> - */
> -static int
> -uds_socket(const char *path)
> -{
> - struct sockaddr_un un;
> - int sockfd;
> - int ret;
> -
> - if (path == NULL)
> - return -1;
> -
> - sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> - if (sockfd < 0)
> - return -1;
> - RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> -
> - memset(&un, 0, sizeof(un));
> - un.sun_family = AF_UNIX;
> - snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> - ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> - if (ret == -1) {
> - RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try
> again.\n",
> - sockfd, path);
> - goto err;
> - }
> - RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> -
> - ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
> - if (ret == -1)
> - goto err;
> -
> - return sockfd;
> -
> -err:
> - close(sockfd);
> - return -1;
> -}
> -
> /* return bytes# of read on success or negative val on failure. */
> static int
> read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> @@ -287,32 +246,24 @@ send_vhost_message(int sockfd, struct VhostUserMsg
> *msg)
> return ret;
> }
>
> -/* call back when there is new vhost-user connection. */
> +
> static void
> -vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
> +vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
> {
> - struct vhost_user_socket *vsocket = dat;
> - int conn_fd;
> - struct vhost_user_connection *conn;
> int vid;
> - unsigned int size;
> -
> - conn_fd = accept(fd, NULL, NULL);
> - RTE_LOG(INFO, VHOST_CONFIG,
> - "new virtio connection is %d\n", conn_fd);
> - if (conn_fd < 0)
> - return;
> + size_t size;
> + struct vhost_user_connection *conn;
>
> - conn = calloc(1, sizeof(*conn));
> + conn = malloc(sizeof(*conn));
> if (conn == NULL) {
> - close(conn_fd);
> + close(fd);
> return;
> }
>
> vid = vhost_new_device();
> if (vid == -1) {
> + close(fd);
> free(conn);
> - close(conn_fd);
> return;
> }
>
> @@ -323,8 +274,21 @@ vhost_user_new_connection(int fd, void *dat, int *remove
> __rte_unused)
>
> conn->vsocket = vsocket;
> conn->vid = vid;
> - fdset_add(&vhost_user.fdset,
> - conn_fd, vhost_user_msg_handler, NULL, conn);
> + fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn);
> +}
> +
> +/* call back when there is new vhost-user connection from client */
> +static void
> +vhost_user_server_new_connection(int fd, void *dat, int *remove
> __rte_unused)
> +{
> + struct vhost_user_socket *vsocket = dat;
> +
> + fd = accept(fd, NULL, NULL);
> + if (fd < 0)
> + return;
> +
> + RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
> + vhost_user_add_connection(fd, vsocket);
> }
>
> /* callback when there is message on the connfd */
> @@ -452,50 +416,135 @@ vhost_user_msg_handler(int connfd, void *dat, int
> *remove)
> }
> }
>
> -/**
> - * Creates and initialise the vhost server.
> - */
> -int
> -rte_vhost_driver_register(const char *path)
> +static int
> +create_unix_socket(const char *path, struct sockaddr_un *un, int is_server)
> {
> - struct vhost_user_socket *vsocket;
> + int fd;
>
> - pthread_mutex_lock(&vhost_user.mutex);
> + fd = socket(AF_UNIX, SOCK_STREAM, 0);
> + if (fd < 0)
> + return -1;
> + RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
> + is_server ? "server" : "client", fd);
>
> - if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "error: the number of servers reaches maximum\n");
> - pthread_mutex_unlock(&vhost_user.mutex);
> + memset(un, 0, sizeof(*un));
> + un->sun_family = AF_UNIX;
> + strncpy(un->sun_path, path, sizeof(un->sun_path));
> +
> + return fd;
> +}
> +
> +static int
> +vhost_user_create_server(struct vhost_user_socket *vsocket)
> +{
> + int fd;
> + int ret;
> + struct sockaddr_un un;
> + const char *path = vsocket->path;
> +
> + fd = create_unix_socket(path, &un, vsocket->is_server);
> + if (fd < 0)
> return -1;
> +
> + ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
> + if (ret < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "failed to bind to %s: %s; remove it and try again\n",
> + path, strerror(errno));
> + goto err;
> }
> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
>
> - vsocket = calloc(sizeof(struct vhost_user_socket), 1);
> - if (vsocket == NULL) {
> - pthread_mutex_unlock(&vhost_user.mutex);
> + ret = listen(fd, MAX_VIRTIO_BACKLOG);
> + if (ret < 0)
> + goto err;
> +
> + vsocket->listenfd = fd;
> + fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
> + NULL, vsocket);
> +
> + return 0;
> +
> +err:
> + close(fd);
> + return -1;
> +}
> +
> +static int
> +vhost_user_create_client(struct vhost_user_socket *vsocket)
> +{
> + int fd;
> + int ret;
> + struct sockaddr_un un;
> + const char *path = vsocket->path;
> +
> + fd = create_unix_socket(path, &un, vsocket->is_server);
> + if (fd < 0)
> + return -1;
> +
> + ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
> + if (ret < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
> + path, strerror(errno));
> + close(fd);
> return -1;
> }
>
> - vsocket->listenfd = uds_socket(path);
> - if (vsocket->listenfd < 0) {
> - free(vsocket);
> - pthread_mutex_unlock(&vhost_user.mutex);
> + vhost_user_add_connection(fd, vsocket);
> +
> + return 0;
> +}
> +
> +/*
> + * Register a new vhost-user socket; here we could act as server
> + * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
> + * is set.
> + */
> +int
> +rte_vhost_driver_register(const char *path, uint64_t flags)
> +{
> + int ret = -1;
> + struct vhost_user_socket *vsocket;
> +
> + if (!path)
> return -1;
> +
> + pthread_mutex_lock(&vhost_user.mutex);
> +
> + if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "error: the number of vhost sockets reaches maximum\n");
> + goto out;
> }
>
> + vsocket = malloc(sizeof(struct vhost_user_socket));
> + if (!vsocket)
> + goto out;
> + memset(vsocket, 0, sizeof(struct vhost_user_socket));
> vsocket->path = strdup(path);
>
> - fdset_add(&vhost_user.fdset, vsocket->listenfd,
> - vhost_user_new_connection, NULL, vsocket);
> + if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
> + ret = vhost_user_create_client(vsocket);
> + } else {
> + vsocket->is_server = 1;
> + ret = vhost_user_create_server(vsocket);
> + }
> + if (ret < 0) {
> + free(vsocket->path);
> + free(vsocket);
> + goto out;
> + }
>
> vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
> +
> +out:
> pthread_mutex_unlock(&vhost_user.mutex);
>
> - return 0;
> + return ret;
> }
>
> -
> /**
> - * Unregister the specified vhost server
> + * Unregister the specified vhost socket
> */
> int
> rte_vhost_driver_unregister(const char *path)
> --
> 1.9.0
>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
2016-05-07 6:40 ` [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base Yuanhan Liu
@ 2016-05-09 10:45 ` Victor Kaplansky
2016-05-09 13:39 ` Xie, Huawei
2016-05-09 12:19 ` Michael S. Tsirkin
` (2 subsequent siblings)
3 siblings, 1 reply; 50+ messages in thread
From: Victor Kaplansky @ 2016-05-09 10:45 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: dev, huawei xie, Michael S. Tsirkin
----- Original Message -----
> From: "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> To: dev@dpdk.org
> Cc: "huawei xie" <huawei.xie@intel.com>, "Yuanhan Liu" <yuanhan.liu@linux.intel.com>, "Michael S. Tsirkin"
> <mst@redhat.com>
> Sent: Saturday, May 7, 2016 9:40:22 AM
> Subject: [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
>
> When DPDK app crashes (or quits, or gets killed), and when QEMU supports
> reconnecting (patches have been sent, not merged yet), a restart of DPDK
> app would get stale vring base from QEMU. That would break the kernel
> virtio net completely, making it non-work any more, unless a driver reset
> is done.
>
> So, instead of getting the stale vring base from QEMU, Huawei suggested
> we could get a proper one from used->idx.
>
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Suggested-by: "Xie, Huawei" <huawei.xie@intel.com>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
>
> Note that a right way to handle reconnect from abnormal quit would be
> let the guest driver to initiate a reset when reconnect is detected
> from QEMU. As a reset from the virtio net driver would resets all virtio
> related meta datas, and trigger the vhost-user re-initiation again,
> therefore, it would make the reconnect work as expected.
>
> What's unforunate is that driver reset on reconnect, as the term "driver"
> implies, need driver hackings, which could be a linux kernel virtio net
> driver, DPDK pmd driver, or even, the windows driver. Apparently, it will
> not work so far, or even for a long while, until we are adapted to the
> new kernel.
>
> The fix (or more precisely, the workaround) from this patch would make
> reconnect work in most case, including the following two hypothesis that
> might corrupt virtio memory:
>
> - vring_used_elem might be in stale state when we are in the middle of
> updating used vring. Say, we might have updated the "id" field, but
> leaving "len" untouched.
>
> - vring desc buff might also be in stale state, when we are copying
> data into it.
>
> The reason it still works is that we haven't updated used->idx yet,
> which means guest driver will not start processing those buggy used
> entries. Therefore, no issues.
>
> However, Michael claims some concerns: he made a good point: a crash
> is happening means some memory is corrupted, and it could be the virtio
> memory being corrupted. In such case, nothing will work without the
> reset.
>
> But I would say, that an app in product use would rare crash, and even
> if it crashes, the chance that virtio memory being corrupted would be
> relatively smaller then. Besides that, it would work, say when the app
> is killed by ctrl-c or kill command. So, it works in the most cases.
> But still, I would say it's just a workaround so far.
>
> On the other hand, without this patch, it would be 100% not working
> from abnormal quit. But with this patch, it works in most cases, just
> don't work in a rare case that when virtio memory is corrupted. Therefore,
> I'd say, it is still good to have, until we have a perfect solution.
> ---
> lib/librte_vhost/virtio-net.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index c88aaa3..df103aa 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -560,6 +560,14 @@ vhost_set_vring_addr(int vid, struct vhost_vring_addr
> *addr)
> return -1;
> }
>
> + if (vq->last_used_idx != vq->used->idx) {
> + RTE_LOG(WARNING, VHOST_CONFIG,
> + "last_used_idx (%u) and vq->used->idx (%u) mismatch\n",
I agree with this approach. I just would add to the log message that last_user_idx
have overrode by used_idx and some packets may be dropped.
> + vq->last_used_idx, vq->used->idx);
> + vq->last_used_idx = vq->used->idx;
> + vq->last_used_idx_res = vq->used->idx;
> + }
> +
> vq->log_guest_addr = addr->log_guest_addr;
>
> LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
> --
> 1.9.0
>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 5/6] examples/vhost: add client and reconnect option
2016-05-07 6:40 ` [dpdk-dev] [PATCH 5/6] examples/vhost: add client and reconnect option Yuanhan Liu
@ 2016-05-09 10:47 ` Victor Kaplansky
0 siblings, 0 replies; 50+ messages in thread
From: Victor Kaplansky @ 2016-05-09 10:47 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: dev, huawei xie, Michael S. Tsirkin
Again, it may be useful to add mixed --client-server option, when
backend tries to connect as client and if failed, comes up as a server...
Just a suggestion from user point of view...
----- Original Message -----
> From: "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> To: dev@dpdk.org
> Cc: "huawei xie" <huawei.xie@intel.com>, "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> Sent: Saturday, May 7, 2016 9:40:23 AM
> Subject: [dpdk-dev] [PATCH 5/6] examples/vhost: add client and reconnect option
>
> Add --client and --reconnect option to enable the client mode and
> reconnect mode, respectively. --rconnect works only when --client
> is given as well.
>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
> examples/vhost/main.c | 23 +++++++++++++++++++++--
> 1 file changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index 6899189..26c4d5f 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -132,6 +132,9 @@ static uint32_t enable_tx_csum;
> /* Disable TSO offload */
> static uint32_t enable_tso;
>
> +static int client_mode;
> +static int reconnect;
> +
> /* Specify timeout (in useconds) between retries on RX. */
> static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
> /* Specify the number of retries on RX. */
> @@ -459,7 +462,9 @@ us_vhost_usage(const char *prgname)
> " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
> " --dev-basename: The basename to be used for the character device.\n"
> " --tx-csum [0|1] disable/enable TX checksum offload.\n"
> - " --tso [0|1] disable/enable TCP segment offload.\n",
> + " --tso [0|1] disable/enable TCP segment offload.\n"
> + " --client register a vhost-user socket as client mode.\n"
> + " --reconnect reconnect to vhost-user server when disconnects.\n",
> prgname);
> }
>
> @@ -484,6 +489,8 @@ us_vhost_parse_args(int argc, char **argv)
> {"dev-basename", required_argument, NULL, 0},
> {"tx-csum", required_argument, NULL, 0},
> {"tso", required_argument, NULL, 0},
> + {"client", no_argument, &client_mode, 1},
> + {"reconnect", no_argument, &reconnect, 1},
> {NULL, 0, 0, 0},
> };
>
> @@ -647,6 +654,12 @@ us_vhost_parse_args(int argc, char **argv)
> }
> }
>
> + if (reconnect && !client_mode) {
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "--reconnect works only when --client is specified\n");
> + return -1;
> + }
> +
> for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> if (enabled_port_mask & (1 << i))
> ports[num_ports++] = (uint8_t)i;
> @@ -1406,6 +1419,7 @@ main(int argc, char *argv[])
> uint8_t portid;
> static pthread_t tid;
> char thread_name[RTE_MAX_THREAD_NAME_LEN];
> + uint64_t flags = 0;
>
> signal(SIGINT, sigint_handler);
>
> @@ -1498,8 +1512,13 @@ main(int argc, char *argv[])
> if (mergeable == 0)
> rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
>
> + if (client_mode)
> + flags |= RTE_VHOST_USER_CLIENT;
> + if (reconnect)
> + flags |= RTE_VHOST_USER_RECONNECT;
> +
> /* Register vhost(cuse or user) driver to handle vhost messages. */
> - ret = rte_vhost_driver_register(dev_basename, 0);
> + ret = rte_vhost_driver_register(dev_basename, flags);
> if (ret != 0)
> rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
>
> --
> 1.9.0
>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 6/6] vhost: add pmd client and reconnect option
2016-05-07 6:40 ` [dpdk-dev] [PATCH 6/6] vhost: add pmd " Yuanhan Liu
@ 2016-05-09 10:54 ` Victor Kaplansky
2016-05-09 18:26 ` Yuanhan Liu
0 siblings, 1 reply; 50+ messages in thread
From: Victor Kaplansky @ 2016-05-09 10:54 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: dev, huawei xie, Tetsuya Mukawa
Looks OK to me. I didn't quite get why open_int() is called so.
What does it open?
--
Victor
----- Original Message -----
> From: "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> To: dev@dpdk.org
> Cc: "huawei xie" <huawei.xie@intel.com>, "Yuanhan Liu" <yuanhan.liu@linux.intel.com>, "Tetsuya Mukawa"
> <mukawa@igel.co.jp>
> Sent: Saturday, May 7, 2016 9:40:24 AM
> Subject: [dpdk-dev] [PATCH 6/6] vhost: add pmd client and reconnect option
>
> Add client and reconnect option to vhost pmd. reconnect only works when
> client is given as well.
>
> Cc: Tetsuya Mukawa <mukawa@igel.co.jp>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
> drivers/net/vhost/rte_eth_vhost.c | 54
> ++++++++++++++++++++++++++++++---------
> 1 file changed, 42 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/net/vhost/rte_eth_vhost.c
> b/drivers/net/vhost/rte_eth_vhost.c
> index 36697cf..7636ef8 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -47,12 +47,16 @@
>
> #define ETH_VHOST_IFACE_ARG "iface"
> #define ETH_VHOST_QUEUES_ARG "queues"
> +#define ETH_VHOST_CLIENT_ARG "client"
> +#define ETH_VHOST_RECONNECT_ARG "reconnect"
>
> static const char *drivername = "VHOST PMD";
>
> static const char *valid_arguments[] = {
> ETH_VHOST_IFACE_ARG,
> ETH_VHOST_QUEUES_ARG,
> + ETH_VHOST_CLIENT_ARG,
> + ETH_VHOST_RECONNECT_ARG,
> NULL
> };
>
> @@ -87,6 +91,7 @@ struct pmd_internal {
> char *dev_name;
> char *iface_name;
> uint16_t max_queues;
> + uint64_t flags;
>
> volatile uint16_t once;
> };
> @@ -456,7 +461,8 @@ eth_dev_start(struct rte_eth_dev *dev)
> int ret = 0;
>
> if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
> - ret = rte_vhost_driver_register(internal->iface_name, 0);
> + ret = rte_vhost_driver_register(internal->iface_name,
> + internal->flags);
> if (ret)
> return ret;
> }
> @@ -661,7 +667,7 @@ static const struct eth_dev_ops ops = {
>
> static int
> eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues,
> - const unsigned numa_node)
> + const unsigned numa_node, uint64_t flags)
> {
> struct rte_eth_dev_data *data = NULL;
> struct pmd_internal *internal = NULL;
> @@ -718,6 +724,7 @@ eth_dev_vhost_create(const char *name, char *iface_name,
> int16_t queues,
> internal->iface_name = strdup(iface_name);
> if (internal->iface_name == NULL)
> goto error;
> + internal->flags = flags;
>
> list->eth_dev = eth_dev;
> pthread_mutex_lock(&internal_list_lock);
> @@ -782,18 +789,15 @@ open_iface(const char *key __rte_unused, const char
> *value, void *extra_args)
> }
>
> static inline int
> -open_queues(const char *key __rte_unused, const char *value, void
> *extra_args)
> +open_int(const char *key __rte_unused, const char *value, void *extra_args)
> {
> - uint16_t *q = extra_args;
> + uint16_t *n = extra_args;
>
> if (value == NULL || extra_args == NULL)
> return -EINVAL;
>
> - *q = (uint16_t)strtoul(value, NULL, 0);
> - if (*q == USHRT_MAX && errno == ERANGE)
> - return -1;
> -
> - if (*q > RTE_MAX_QUEUES_PER_PORT)
> + *n = (uint16_t)strtoul(value, NULL, 0);
> + if (*n == USHRT_MAX && errno == ERANGE)
> return -1;
>
> return 0;
> @@ -806,6 +810,9 @@ rte_pmd_vhost_devinit(const char *name, const char
> *params)
> int ret = 0;
> char *iface_name;
> uint16_t queues;
> + uint64_t flags = 0;
> + int client_mode;
> + int reconnect;
>
> RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", name);
>
> @@ -825,14 +832,37 @@ rte_pmd_vhost_devinit(const char *name, const char
> *params)
>
> if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
> ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
> - &open_queues, &queues);
> - if (ret < 0)
> + &open_int, &queues);
> + if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT)
> goto out_free;
>
> } else
> queues = 1;
>
> - eth_dev_vhost_create(name, iface_name, queues, rte_socket_id());
> + if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) {
> + ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG,
> + &open_int, &client_mode);
> + if (ret < 0)
> + goto out_free;
> + }
> + if (rte_kvargs_count(kvlist, ETH_VHOST_RECONNECT_ARG) == 1) {
> + ret = rte_kvargs_process(kvlist, ETH_VHOST_RECONNECT_ARG,
> + &open_int, &reconnect);
> + if (ret < 0)
> + goto out_free;
> + }
> + if (client_mode)
> + flags |= RTE_VHOST_USER_CLIENT;
> + if (reconnect)
> + flags |= RTE_VHOST_USER_RECONNECT;
> + if (reconnect && !client_mode) {
> + RTE_LOG(ERR, PMD,
> + "reconnect works only when client is specified\n");
> + ret = -1;
> + goto out_free;
> + }
> +
> + eth_dev_vhost_create(name, iface_name, queues, rte_socket_id(), flags);
>
> out_free:
> rte_kvargs_free(kvlist);
> --
> 1.9.0
>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
2016-05-07 6:40 ` [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base Yuanhan Liu
2016-05-09 10:45 ` Victor Kaplansky
@ 2016-05-09 12:19 ` Michael S. Tsirkin
2016-05-09 16:25 ` Xie, Huawei
2016-05-10 8:21 ` Xie, Huawei
3 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2016-05-09 12:19 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: dev, huawei.xie
On Fri, May 06, 2016 at 11:40:22PM -0700, Yuanhan Liu wrote:
> When DPDK app crashes (or quits, or gets killed), and when QEMU supports
> reconnecting (patches have been sent, not merged yet), a restart of DPDK
> app would get stale vring base from QEMU. That would break the kernel
> virtio net completely, making it non-work any more, unless a driver reset
> is done.
>
> So, instead of getting the stale vring base from QEMU, Huawei suggested
> we could get a proper one from used->idx.
>
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Suggested-by: "Xie, Huawei" <huawei.xie@intel.com>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
>
> Note that a right way to handle reconnect from abnormal quit would be
> let the guest driver to initiate a reset when reconnect is detected
> from QEMU. As a reset from the virtio net driver would resets all virtio
> related meta datas, and trigger the vhost-user re-initiation again,
> therefore, it would make the reconnect work as expected.
>
> What's unforunate is that driver reset on reconnect, as the term "driver"
> implies, need driver hackings, which could be a linux kernel virtio net
> driver, DPDK pmd driver, or even, the windows driver. Apparently, it will
> not work so far, or even for a long while, until we are adapted to the
> new kernel.
>
> The fix (or more precisely, the workaround) from this patch would make
> reconnect work in most case, including the following two hypothesis that
> might corrupt virtio memory:
>
> - vring_used_elem might be in stale state when we are in the middle of
> updating used vring. Say, we might have updated the "id" field, but
> leaving "len" untouched.
>
> - vring desc buff might also be in stale state, when we are copying
> data into it.
>
> The reason it still works is that we haven't updated used->idx yet,
> which means guest driver will not start processing those buggy used
> entries. Therefore, no issues.
>
> However, Michael claims some concerns: he made a good point: a crash
> is happening means some memory is corrupted, and it could be the virtio
> memory being corrupted. In such case, nothing will work without the
> reset.
>
> But I would say, that an app in product use would rare crash, and even
> if it crashes, the chance that virtio memory being corrupted would be
> relatively smaller then. Besides that, it would work, say when the app
> is killed by ctrl-c or kill command. So, it works in the most cases.
> But still, I would say it's just a workaround so far.
>
> On the other hand, without this patch, it would be 100% not working
> from abnormal quit. But with this patch, it works in most cases, just
> don't work in a rare case that when virtio memory is corrupted. Therefore,
> I'd say, it is still good to have, until we have a perfect solution.
> ---
> lib/librte_vhost/virtio-net.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index c88aaa3..df103aa 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -560,6 +560,14 @@ vhost_set_vring_addr(int vid, struct vhost_vring_addr *addr)
> return -1;
> }
>
> + if (vq->last_used_idx != vq->used->idx) {
> + RTE_LOG(WARNING, VHOST_CONFIG,
> + "last_used_idx (%u) and vq->used->idx (%u) mismatch\n",
> + vq->last_used_idx, vq->used->idx);
> + vq->last_used_idx = vq->used->idx;
> + vq->last_used_idx_res = vq->used->idx;
> + }
> +
> vq->log_guest_addr = addr->log_guest_addr;
>
> LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
Acked-by: Michael S. Tsirkin <mst@redhat.com>
And I would go further and just ignore the index coming from QEMU.
dpdk processes packets in order so it does not need it.
> --
> 1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
2016-05-09 10:45 ` Victor Kaplansky
@ 2016-05-09 13:39 ` Xie, Huawei
2016-05-09 18:23 ` Yuanhan Liu
0 siblings, 1 reply; 50+ messages in thread
From: Xie, Huawei @ 2016-05-09 13:39 UTC (permalink / raw)
To: Victor Kaplansky, Yuanhan Liu; +Cc: dev, Michael S. Tsirkin
On 5/9/2016 6:45 PM, Victor Kaplansky wrote:
>> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
>> > index c88aaa3..df103aa 100644
>> > --- a/lib/librte_vhost/virtio-net.c
>> > +++ b/lib/librte_vhost/virtio-net.c
>> > @@ -560,6 +560,14 @@ vhost_set_vring_addr(int vid, struct vhost_vring_addr
>> > *addr)
>> > return -1;
>> > }
>> >
>> > + if (vq->last_used_idx != vq->used->idx) {
>> > + RTE_LOG(WARNING, VHOST_CONFIG,
>> > + "last_used_idx (%u) and vq->used->idx (%u) mismatch\n",
> I agree with this approach. I just would add to the log message that last_user_idx
> have overrode by used_idx and some packets may be dropped.
For TX, the packets are resent.
For RX, the packets are dropped.
>
>> > + vq->last_used_idx, vq->used->idx);
>> > + vq->last_used_idx = vq->used->idx;
>> > + vq->last_used_idx_res = vq->used->idx;
>> > + }
>> > +
>> > vq->log_guest_addr = addr->log_guest_addr;
>> >
>> > LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
>> > --
>> > 1.9.0
>> >
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
2016-05-07 6:40 ` [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base Yuanhan Liu
2016-05-09 10:45 ` Victor Kaplansky
2016-05-09 12:19 ` Michael S. Tsirkin
@ 2016-05-09 16:25 ` Xie, Huawei
2016-05-09 18:22 ` Yuanhan Liu
2016-05-10 8:21 ` Xie, Huawei
3 siblings, 1 reply; 50+ messages in thread
From: Xie, Huawei @ 2016-05-09 16:25 UTC (permalink / raw)
To: Yuanhan Liu, dev; +Cc: Michael S. Tsirkin
On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> However, Michael claims some concerns: he made a good point: a crash
> is happening means some memory is corrupted, and it could be the virtio
> memory being corrupted. In such case, nothing will work without the
> reset.
I don't get this point. What is the scenario?
For the crash of virtio frontend driver, i remember we discussed before,
we have no good recipe but some workaround. The user space frontend
driver crashes, and its memory is reallocated to other instances, but
vhost is still writing to that memory. However this has nothing to do
with vhost reconnect.
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-07 6:40 ` [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability Yuanhan Liu
@ 2016-05-09 16:47 ` Xie, Huawei
2016-05-09 18:12 ` Yuanhan Liu
0 siblings, 1 reply; 50+ messages in thread
From: Xie, Huawei @ 2016-05-09 16:47 UTC (permalink / raw)
To: Yuanhan Liu, dev
On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> +static void *
> +vhost_user_client_reconnect(void *arg)
> +{
> + struct reconnect_info *reconn = arg;
> + int ret;
> +
> + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
> + while (1) {
> + ret = connect(reconn->fd, (struct sockaddr *)&reconn->un,
> + sizeof(reconn->un));
> + if (ret == 0)
> + break;
> + sleep(1);
> + }
> +
> + vhost_user_add_connection(reconn->fd, reconn->vsocket);
> + free(reconn);
> +
> + return NULL;
> +}
> +
We could create hundreds of vhost-user ports in OVS. Wihout connections
with QEMU established, those ports are just inactive. This works fine in
server mode.
With client modes, do we need to create hundreds of vhost threads? This
would be too interruptible.
How about we create only one thread and do the reconnections for all the
unconnected socket?
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-09 16:47 ` Xie, Huawei
@ 2016-05-09 18:12 ` Yuanhan Liu
2016-05-10 7:24 ` Xie, Huawei
0 siblings, 1 reply; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-09 18:12 UTC (permalink / raw)
To: Xie, Huawei; +Cc: dev
On Mon, May 09, 2016 at 04:47:02PM +0000, Xie, Huawei wrote:
> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> > +static void *
> > +vhost_user_client_reconnect(void *arg)
> > +{
> > + struct reconnect_info *reconn = arg;
> > + int ret;
> > +
> > + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
> > + while (1) {
> > + ret = connect(reconn->fd, (struct sockaddr *)&reconn->un,
> > + sizeof(reconn->un));
> > + if (ret == 0)
> > + break;
> > + sleep(1);
> > + }
> > +
> > + vhost_user_add_connection(reconn->fd, reconn->vsocket);
> > + free(reconn);
> > +
> > + return NULL;
> > +}
> > +
>
> We could create hundreds of vhost-user ports in OVS. Wihout connections
> with QEMU established, those ports are just inactive. This works fine in
> server mode.
> With client modes, do we need to create hundreds of vhost threads? This
> would be too interruptible.
> How about we create only one thread and do the reconnections for all the
> unconnected socket?
Yes, good point and good suggestion. Will do it in v2.
Thanks.
--yliu
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
2016-05-09 16:25 ` Xie, Huawei
@ 2016-05-09 18:22 ` Yuanhan Liu
2016-06-13 20:47 ` Michael S. Tsirkin
0 siblings, 1 reply; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-09 18:22 UTC (permalink / raw)
To: Xie, Huawei; +Cc: dev, Michael S. Tsirkin
On Mon, May 09, 2016 at 04:25:38PM +0000, Xie, Huawei wrote:
> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> > However, Michael claims some concerns: he made a good point: a crash
> > is happening means some memory is corrupted, and it could be the virtio
> > memory being corrupted. In such case, nothing will work without the
> > reset.
>
> I don't get this point. What is the scenario?
It's not a specific scenario, just a hypothetic one.
> For the crash of virtio frontend driver, i remember we discussed before,
> we have no good recipe but some workaround. The user space frontend
> driver crashes, and its memory is reallocated to other instances, but
> vhost is still writing to that memory. However this has nothing to do
> with vhost reconnect.
Hmm, yes, seems like another good point to me. This patch seems like
a fix but not a workaround then :)
--yliu
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
2016-05-09 13:39 ` Xie, Huawei
@ 2016-05-09 18:23 ` Yuanhan Liu
0 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-09 18:23 UTC (permalink / raw)
To: Xie, Huawei; +Cc: Victor Kaplansky, dev, Michael S. Tsirkin
On Mon, May 09, 2016 at 01:39:25PM +0000, Xie, Huawei wrote:
> On 5/9/2016 6:45 PM, Victor Kaplansky wrote:
> >> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> >> > index c88aaa3..df103aa 100644
> >> > --- a/lib/librte_vhost/virtio-net.c
> >> > +++ b/lib/librte_vhost/virtio-net.c
> >> > @@ -560,6 +560,14 @@ vhost_set_vring_addr(int vid, struct vhost_vring_addr
> >> > *addr)
> >> > return -1;
> >> > }
> >> >
> >> > + if (vq->last_used_idx != vq->used->idx) {
> >> > + RTE_LOG(WARNING, VHOST_CONFIG,
> >> > + "last_used_idx (%u) and vq->used->idx (%u) mismatch\n",
> > I agree with this approach. I just would add to the log message that last_user_idx
> > have overrode by used_idx and some packets may be dropped.
Good suggestion. Will do in v2.
> For TX, the packets are resent.
> For RX, the packets are dropped.
Yes.
Thanks.
--yliu
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 6/6] vhost: add pmd client and reconnect option
2016-05-09 10:54 ` Victor Kaplansky
@ 2016-05-09 18:26 ` Yuanhan Liu
0 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-09 18:26 UTC (permalink / raw)
To: Victor Kaplansky; +Cc: dev, huawei xie, Tetsuya Mukawa
On Mon, May 09, 2016 at 06:54:04AM -0400, Victor Kaplansky wrote:
> Looks OK to me. I didn't quite get why open_int() is called so.
> What does it open?
Thanks for review.
It's a callback for key-value arg processing. It turns the value
string into a number.
--yliu
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 2/6] vhost: add vhost-user client mode
2016-05-09 20:33 ` Yuanhan Liu
@ 2016-05-09 20:30 ` Michael S. Tsirkin
0 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2016-05-09 20:30 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: Victor Kaplansky, dev, huawei xie
On Mon, May 09, 2016 at 01:33:08PM -0700, Yuanhan Liu wrote:
> On Mon, May 09, 2016 at 06:33:45AM -0400, Victor Kaplansky wrote:
> > Adding a flag for a future extension seems fine to me.
> > Could we manage without adding a flag?
> > For example, could we always try a client mode for a while at first,
> > and if unsuccessful, then come up with server mode?
>
> It's hard to define "how long is for a while". And I don't think
> there is a way to switch it back to client mode from server mode
> if you do so.
>
> So, assume you create a vhost-user port (as client), and you
> start QEMU (as server) later, after the gap you mentioned of
> auto-turn into server, it simply doesn't work then.
>
> --yliu
I think I agree - I don't know of other software doing
such an automatic switch. Correctly setting server/client mode
seems easy enough.
> > ----- Original Message -----
> > > From: "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> > > To: dev@dpdk.org
> > > Cc: "huawei xie" <huawei.xie@intel.com>, "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> > > Sent: Saturday, May 7, 2016 9:40:20 AM
> > > Subject: [dpdk-dev] [PATCH 2/6] vhost: add vhost-user client mode
> > >
> > > Add a new paramter (flags) to rte_vhost_driver_register(). DPDK
> > > vhost-user acts as client mode when RTE_VHOST_USER_CLIENT flag
> > > is set.
> > >
> > > The flags would also allow future extensions without breaking the
> > > API (again).
> > >
> > > The rest is straingfoward then: allocate a unix socket, and
> > > bind/listen for server, connect for client.
> > >
> > > Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> > > ---
> > > drivers/net/vhost/rte_eth_vhost.c | 2 +-
> > > examples/vhost/main.c | 2 +-
> > > lib/librte_vhost/rte_virtio_net.h | 11 +-
> > > lib/librte_vhost/vhost_user/vhost-net-user.c | 215
> > > ++++++++++++++++-----------
> > > 4 files changed, 142 insertions(+), 88 deletions(-)
> > >
> > > diff --git a/drivers/net/vhost/rte_eth_vhost.c
> > > b/drivers/net/vhost/rte_eth_vhost.c
> > > index a9dada5..36697cf 100644
> > > --- a/drivers/net/vhost/rte_eth_vhost.c
> > > +++ b/drivers/net/vhost/rte_eth_vhost.c
> > > @@ -456,7 +456,7 @@ eth_dev_start(struct rte_eth_dev *dev)
> > > int ret = 0;
> > >
> > > if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
> > > - ret = rte_vhost_driver_register(internal->iface_name);
> > > + ret = rte_vhost_driver_register(internal->iface_name, 0);
> > > if (ret)
> > > return ret;
> > > }
> > > diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> > > index bbf0d28..6899189 100644
> > > --- a/examples/vhost/main.c
> > > +++ b/examples/vhost/main.c
> > > @@ -1499,7 +1499,7 @@ main(int argc, char *argv[])
> > > rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
> > >
> > > /* Register vhost(cuse or user) driver to handle vhost messages. */
> > > - ret = rte_vhost_driver_register((char *)&dev_basename);
> > > + ret = rte_vhost_driver_register(dev_basename, 0);
> > > if (ret != 0)
> > > rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
> > >
> > > diff --git a/lib/librte_vhost/rte_virtio_net.h
> > > b/lib/librte_vhost/rte_virtio_net.h
> > > index 4e50425..c84e7ab 100644
> > > --- a/lib/librte_vhost/rte_virtio_net.h
> > > +++ b/lib/librte_vhost/rte_virtio_net.h
> > > @@ -51,6 +51,8 @@
> > > #include <rte_mempool.h>
> > > #include <rte_ether.h>
> > >
> > > +#define RTE_VHOST_USER_CLIENT (1ULL << 0)
> > > +
> > > struct rte_mbuf;
> > >
> > > #define VHOST_MEMORY_MAX_NREGIONS 8
> > > @@ -96,11 +98,14 @@ uint64_t rte_vhost_feature_get(void);
> > >
> > > int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int
> > > enable);
> > >
> > > -/* Register vhost driver. dev_name could be different for multiple instance
> > > support. */
> > > -int rte_vhost_driver_register(const char *dev_name);
> > > +/**
> > > + * Register vhost driver. path could be different for multiple
> > > + * instance support.
> > > + */
> > > +int rte_vhost_driver_register(const char *path, uint64_t flags);
> > >
> > > /* Unregister vhost driver. This is only meaningful to vhost user. */
> > > -int rte_vhost_driver_unregister(const char *dev_name);
> > > +int rte_vhost_driver_unregister(const char *path);
> > >
> > > /* Register callbacks. */
> > > int rte_vhost_driver_callback_register(struct virtio_net_device_ops const *
> > > const);
> > > diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c
> > > b/lib/librte_vhost/vhost_user/vhost-net-user.c
> > > index f485a3b..aa98717 100644
> > > --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
> > > +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> > > @@ -58,6 +58,7 @@
> > > struct vhost_user_socket {
> > > char *path;
> > > int listenfd;
> > > + int is_server;
> > > };
> > >
> > > struct vhost_user_connection {
> > > @@ -75,7 +76,7 @@ struct vhost_user {
> > >
> > > #define MAX_VIRTIO_BACKLOG 128
> > >
> > > -static void vhost_user_new_connection(int fd, void *data, int *remove);
> > > +static void vhost_user_server_new_connection(int fd, void *data, int
> > > *remove);
> > > static void vhost_user_msg_handler(int fd, void *dat, int *remove);
> > >
> > > static struct vhost_user vhost_user = {
> > > @@ -111,48 +112,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
> > > [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
> > > };
> > >
> > > -/**
> > > - * Create a unix domain socket, bind to path and listen for connection.
> > > - * @return
> > > - * socket fd or -1 on failure
> > > - */
> > > -static int
> > > -uds_socket(const char *path)
> > > -{
> > > - struct sockaddr_un un;
> > > - int sockfd;
> > > - int ret;
> > > -
> > > - if (path == NULL)
> > > - return -1;
> > > -
> > > - sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> > > - if (sockfd < 0)
> > > - return -1;
> > > - RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> > > -
> > > - memset(&un, 0, sizeof(un));
> > > - un.sun_family = AF_UNIX;
> > > - snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> > > - ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> > > - if (ret == -1) {
> > > - RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try
> > > again.\n",
> > > - sockfd, path);
> > > - goto err;
> > > - }
> > > - RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> > > -
> > > - ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
> > > - if (ret == -1)
> > > - goto err;
> > > -
> > > - return sockfd;
> > > -
> > > -err:
> > > - close(sockfd);
> > > - return -1;
> > > -}
> > > -
> > > /* return bytes# of read on success or negative val on failure. */
> > > static int
> > > read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> > > @@ -287,32 +246,24 @@ send_vhost_message(int sockfd, struct VhostUserMsg
> > > *msg)
> > > return ret;
> > > }
> > >
> > > -/* call back when there is new vhost-user connection. */
> > > +
> > > static void
> > > -vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
> > > +vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
> > > {
> > > - struct vhost_user_socket *vsocket = dat;
> > > - int conn_fd;
> > > - struct vhost_user_connection *conn;
> > > int vid;
> > > - unsigned int size;
> > > -
> > > - conn_fd = accept(fd, NULL, NULL);
> > > - RTE_LOG(INFO, VHOST_CONFIG,
> > > - "new virtio connection is %d\n", conn_fd);
> > > - if (conn_fd < 0)
> > > - return;
> > > + size_t size;
> > > + struct vhost_user_connection *conn;
> > >
> > > - conn = calloc(1, sizeof(*conn));
> > > + conn = malloc(sizeof(*conn));
> > > if (conn == NULL) {
> > > - close(conn_fd);
> > > + close(fd);
> > > return;
> > > }
> > >
> > > vid = vhost_new_device();
> > > if (vid == -1) {
> > > + close(fd);
> > > free(conn);
> > > - close(conn_fd);
> > > return;
> > > }
> > >
> > > @@ -323,8 +274,21 @@ vhost_user_new_connection(int fd, void *dat, int *remove
> > > __rte_unused)
> > >
> > > conn->vsocket = vsocket;
> > > conn->vid = vid;
> > > - fdset_add(&vhost_user.fdset,
> > > - conn_fd, vhost_user_msg_handler, NULL, conn);
> > > + fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn);
> > > +}
> > > +
> > > +/* call back when there is new vhost-user connection from client */
> > > +static void
> > > +vhost_user_server_new_connection(int fd, void *dat, int *remove
> > > __rte_unused)
> > > +{
> > > + struct vhost_user_socket *vsocket = dat;
> > > +
> > > + fd = accept(fd, NULL, NULL);
> > > + if (fd < 0)
> > > + return;
> > > +
> > > + RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
> > > + vhost_user_add_connection(fd, vsocket);
> > > }
> > >
> > > /* callback when there is message on the connfd */
> > > @@ -452,50 +416,135 @@ vhost_user_msg_handler(int connfd, void *dat, int
> > > *remove)
> > > }
> > > }
> > >
> > > -/**
> > > - * Creates and initialise the vhost server.
> > > - */
> > > -int
> > > -rte_vhost_driver_register(const char *path)
> > > +static int
> > > +create_unix_socket(const char *path, struct sockaddr_un *un, int is_server)
> > > {
> > > - struct vhost_user_socket *vsocket;
> > > + int fd;
> > >
> > > - pthread_mutex_lock(&vhost_user.mutex);
> > > + fd = socket(AF_UNIX, SOCK_STREAM, 0);
> > > + if (fd < 0)
> > > + return -1;
> > > + RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
> > > + is_server ? "server" : "client", fd);
> > >
> > > - if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
> > > - RTE_LOG(ERR, VHOST_CONFIG,
> > > - "error: the number of servers reaches maximum\n");
> > > - pthread_mutex_unlock(&vhost_user.mutex);
> > > + memset(un, 0, sizeof(*un));
> > > + un->sun_family = AF_UNIX;
> > > + strncpy(un->sun_path, path, sizeof(un->sun_path));
> > > +
> > > + return fd;
> > > +}
> > > +
> > > +static int
> > > +vhost_user_create_server(struct vhost_user_socket *vsocket)
> > > +{
> > > + int fd;
> > > + int ret;
> > > + struct sockaddr_un un;
> > > + const char *path = vsocket->path;
> > > +
> > > + fd = create_unix_socket(path, &un, vsocket->is_server);
> > > + if (fd < 0)
> > > return -1;
> > > +
> > > + ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
> > > + if (ret < 0) {
> > > + RTE_LOG(ERR, VHOST_CONFIG,
> > > + "failed to bind to %s: %s; remove it and try again\n",
> > > + path, strerror(errno));
> > > + goto err;
> > > }
> > > + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> > >
> > > - vsocket = calloc(sizeof(struct vhost_user_socket), 1);
> > > - if (vsocket == NULL) {
> > > - pthread_mutex_unlock(&vhost_user.mutex);
> > > + ret = listen(fd, MAX_VIRTIO_BACKLOG);
> > > + if (ret < 0)
> > > + goto err;
> > > +
> > > + vsocket->listenfd = fd;
> > > + fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
> > > + NULL, vsocket);
> > > +
> > > + return 0;
> > > +
> > > +err:
> > > + close(fd);
> > > + return -1;
> > > +}
> > > +
> > > +static int
> > > +vhost_user_create_client(struct vhost_user_socket *vsocket)
> > > +{
> > > + int fd;
> > > + int ret;
> > > + struct sockaddr_un un;
> > > + const char *path = vsocket->path;
> > > +
> > > + fd = create_unix_socket(path, &un, vsocket->is_server);
> > > + if (fd < 0)
> > > + return -1;
> > > +
> > > + ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
> > > + if (ret < 0) {
> > > + RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
> > > + path, strerror(errno));
> > > + close(fd);
> > > return -1;
> > > }
> > >
> > > - vsocket->listenfd = uds_socket(path);
> > > - if (vsocket->listenfd < 0) {
> > > - free(vsocket);
> > > - pthread_mutex_unlock(&vhost_user.mutex);
> > > + vhost_user_add_connection(fd, vsocket);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +/*
> > > + * Register a new vhost-user socket; here we could act as server
> > > + * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
> > > + * is set.
> > > + */
> > > +int
> > > +rte_vhost_driver_register(const char *path, uint64_t flags)
> > > +{
> > > + int ret = -1;
> > > + struct vhost_user_socket *vsocket;
> > > +
> > > + if (!path)
> > > return -1;
> > > +
> > > + pthread_mutex_lock(&vhost_user.mutex);
> > > +
> > > + if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
> > > + RTE_LOG(ERR, VHOST_CONFIG,
> > > + "error: the number of vhost sockets reaches maximum\n");
> > > + goto out;
> > > }
> > >
> > > + vsocket = malloc(sizeof(struct vhost_user_socket));
> > > + if (!vsocket)
> > > + goto out;
> > > + memset(vsocket, 0, sizeof(struct vhost_user_socket));
> > > vsocket->path = strdup(path);
> > >
> > > - fdset_add(&vhost_user.fdset, vsocket->listenfd,
> > > - vhost_user_new_connection, NULL, vsocket);
> > > + if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
> > > + ret = vhost_user_create_client(vsocket);
> > > + } else {
> > > + vsocket->is_server = 1;
> > > + ret = vhost_user_create_server(vsocket);
> > > + }
> > > + if (ret < 0) {
> > > + free(vsocket->path);
> > > + free(vsocket);
> > > + goto out;
> > > + }
> > >
> > > vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
> > > +
> > > +out:
> > > pthread_mutex_unlock(&vhost_user.mutex);
> > >
> > > - return 0;
> > > + return ret;
> > > }
> > >
> > > -
> > > /**
> > > - * Unregister the specified vhost server
> > > + * Unregister the specified vhost socket
> > > */
> > > int
> > > rte_vhost_driver_unregister(const char *path)
> > > --
> > > 1.9.0
> > >
> > >
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 2/6] vhost: add vhost-user client mode
2016-05-09 10:33 ` Victor Kaplansky
@ 2016-05-09 20:33 ` Yuanhan Liu
2016-05-09 20:30 ` Michael S. Tsirkin
0 siblings, 1 reply; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-09 20:33 UTC (permalink / raw)
To: Victor Kaplansky; +Cc: dev, huawei xie, Michael S. Tsirkin
On Mon, May 09, 2016 at 06:33:45AM -0400, Victor Kaplansky wrote:
> Adding a flag for a future extension seems fine to me.
> Could we manage without adding a flag?
> For example, could we always try a client mode for a while at first,
> and if unsuccessful, then come up with server mode?
It's hard to define "how long is for a while". And I don't think
there is a way to switch it back to client mode from server mode
if you do so.
So, assume you create a vhost-user port (as client), and you
start QEMU (as server) later, after the gap you mentioned of
auto-turn into server, it simply doesn't work then.
--yliu
> ----- Original Message -----
> > From: "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> > To: dev@dpdk.org
> > Cc: "huawei xie" <huawei.xie@intel.com>, "Yuanhan Liu" <yuanhan.liu@linux.intel.com>
> > Sent: Saturday, May 7, 2016 9:40:20 AM
> > Subject: [dpdk-dev] [PATCH 2/6] vhost: add vhost-user client mode
> >
> > Add a new paramter (flags) to rte_vhost_driver_register(). DPDK
> > vhost-user acts as client mode when RTE_VHOST_USER_CLIENT flag
> > is set.
> >
> > The flags would also allow future extensions without breaking the
> > API (again).
> >
> > The rest is straingfoward then: allocate a unix socket, and
> > bind/listen for server, connect for client.
> >
> > Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> > ---
> > drivers/net/vhost/rte_eth_vhost.c | 2 +-
> > examples/vhost/main.c | 2 +-
> > lib/librte_vhost/rte_virtio_net.h | 11 +-
> > lib/librte_vhost/vhost_user/vhost-net-user.c | 215
> > ++++++++++++++++-----------
> > 4 files changed, 142 insertions(+), 88 deletions(-)
> >
> > diff --git a/drivers/net/vhost/rte_eth_vhost.c
> > b/drivers/net/vhost/rte_eth_vhost.c
> > index a9dada5..36697cf 100644
> > --- a/drivers/net/vhost/rte_eth_vhost.c
> > +++ b/drivers/net/vhost/rte_eth_vhost.c
> > @@ -456,7 +456,7 @@ eth_dev_start(struct rte_eth_dev *dev)
> > int ret = 0;
> >
> > if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
> > - ret = rte_vhost_driver_register(internal->iface_name);
> > + ret = rte_vhost_driver_register(internal->iface_name, 0);
> > if (ret)
> > return ret;
> > }
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> > index bbf0d28..6899189 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -1499,7 +1499,7 @@ main(int argc, char *argv[])
> > rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
> >
> > /* Register vhost(cuse or user) driver to handle vhost messages. */
> > - ret = rte_vhost_driver_register((char *)&dev_basename);
> > + ret = rte_vhost_driver_register(dev_basename, 0);
> > if (ret != 0)
> > rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
> >
> > diff --git a/lib/librte_vhost/rte_virtio_net.h
> > b/lib/librte_vhost/rte_virtio_net.h
> > index 4e50425..c84e7ab 100644
> > --- a/lib/librte_vhost/rte_virtio_net.h
> > +++ b/lib/librte_vhost/rte_virtio_net.h
> > @@ -51,6 +51,8 @@
> > #include <rte_mempool.h>
> > #include <rte_ether.h>
> >
> > +#define RTE_VHOST_USER_CLIENT (1ULL << 0)
> > +
> > struct rte_mbuf;
> >
> > #define VHOST_MEMORY_MAX_NREGIONS 8
> > @@ -96,11 +98,14 @@ uint64_t rte_vhost_feature_get(void);
> >
> > int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int
> > enable);
> >
> > -/* Register vhost driver. dev_name could be different for multiple instance
> > support. */
> > -int rte_vhost_driver_register(const char *dev_name);
> > +/**
> > + * Register vhost driver. path could be different for multiple
> > + * instance support.
> > + */
> > +int rte_vhost_driver_register(const char *path, uint64_t flags);
> >
> > /* Unregister vhost driver. This is only meaningful to vhost user. */
> > -int rte_vhost_driver_unregister(const char *dev_name);
> > +int rte_vhost_driver_unregister(const char *path);
> >
> > /* Register callbacks. */
> > int rte_vhost_driver_callback_register(struct virtio_net_device_ops const *
> > const);
> > diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c
> > b/lib/librte_vhost/vhost_user/vhost-net-user.c
> > index f485a3b..aa98717 100644
> > --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
> > +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> > @@ -58,6 +58,7 @@
> > struct vhost_user_socket {
> > char *path;
> > int listenfd;
> > + int is_server;
> > };
> >
> > struct vhost_user_connection {
> > @@ -75,7 +76,7 @@ struct vhost_user {
> >
> > #define MAX_VIRTIO_BACKLOG 128
> >
> > -static void vhost_user_new_connection(int fd, void *data, int *remove);
> > +static void vhost_user_server_new_connection(int fd, void *data, int
> > *remove);
> > static void vhost_user_msg_handler(int fd, void *dat, int *remove);
> >
> > static struct vhost_user vhost_user = {
> > @@ -111,48 +112,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
> > [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
> > };
> >
> > -/**
> > - * Create a unix domain socket, bind to path and listen for connection.
> > - * @return
> > - * socket fd or -1 on failure
> > - */
> > -static int
> > -uds_socket(const char *path)
> > -{
> > - struct sockaddr_un un;
> > - int sockfd;
> > - int ret;
> > -
> > - if (path == NULL)
> > - return -1;
> > -
> > - sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> > - if (sockfd < 0)
> > - return -1;
> > - RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> > -
> > - memset(&un, 0, sizeof(un));
> > - un.sun_family = AF_UNIX;
> > - snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> > - ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> > - if (ret == -1) {
> > - RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try
> > again.\n",
> > - sockfd, path);
> > - goto err;
> > - }
> > - RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> > -
> > - ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
> > - if (ret == -1)
> > - goto err;
> > -
> > - return sockfd;
> > -
> > -err:
> > - close(sockfd);
> > - return -1;
> > -}
> > -
> > /* return bytes# of read on success or negative val on failure. */
> > static int
> > read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> > @@ -287,32 +246,24 @@ send_vhost_message(int sockfd, struct VhostUserMsg
> > *msg)
> > return ret;
> > }
> >
> > -/* call back when there is new vhost-user connection. */
> > +
> > static void
> > -vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
> > +vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
> > {
> > - struct vhost_user_socket *vsocket = dat;
> > - int conn_fd;
> > - struct vhost_user_connection *conn;
> > int vid;
> > - unsigned int size;
> > -
> > - conn_fd = accept(fd, NULL, NULL);
> > - RTE_LOG(INFO, VHOST_CONFIG,
> > - "new virtio connection is %d\n", conn_fd);
> > - if (conn_fd < 0)
> > - return;
> > + size_t size;
> > + struct vhost_user_connection *conn;
> >
> > - conn = calloc(1, sizeof(*conn));
> > + conn = malloc(sizeof(*conn));
> > if (conn == NULL) {
> > - close(conn_fd);
> > + close(fd);
> > return;
> > }
> >
> > vid = vhost_new_device();
> > if (vid == -1) {
> > + close(fd);
> > free(conn);
> > - close(conn_fd);
> > return;
> > }
> >
> > @@ -323,8 +274,21 @@ vhost_user_new_connection(int fd, void *dat, int *remove
> > __rte_unused)
> >
> > conn->vsocket = vsocket;
> > conn->vid = vid;
> > - fdset_add(&vhost_user.fdset,
> > - conn_fd, vhost_user_msg_handler, NULL, conn);
> > + fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn);
> > +}
> > +
> > +/* call back when there is new vhost-user connection from client */
> > +static void
> > +vhost_user_server_new_connection(int fd, void *dat, int *remove
> > __rte_unused)
> > +{
> > + struct vhost_user_socket *vsocket = dat;
> > +
> > + fd = accept(fd, NULL, NULL);
> > + if (fd < 0)
> > + return;
> > +
> > + RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
> > + vhost_user_add_connection(fd, vsocket);
> > }
> >
> > /* callback when there is message on the connfd */
> > @@ -452,50 +416,135 @@ vhost_user_msg_handler(int connfd, void *dat, int
> > *remove)
> > }
> > }
> >
> > -/**
> > - * Creates and initialise the vhost server.
> > - */
> > -int
> > -rte_vhost_driver_register(const char *path)
> > +static int
> > +create_unix_socket(const char *path, struct sockaddr_un *un, int is_server)
> > {
> > - struct vhost_user_socket *vsocket;
> > + int fd;
> >
> > - pthread_mutex_lock(&vhost_user.mutex);
> > + fd = socket(AF_UNIX, SOCK_STREAM, 0);
> > + if (fd < 0)
> > + return -1;
> > + RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
> > + is_server ? "server" : "client", fd);
> >
> > - if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
> > - RTE_LOG(ERR, VHOST_CONFIG,
> > - "error: the number of servers reaches maximum\n");
> > - pthread_mutex_unlock(&vhost_user.mutex);
> > + memset(un, 0, sizeof(*un));
> > + un->sun_family = AF_UNIX;
> > + strncpy(un->sun_path, path, sizeof(un->sun_path));
> > +
> > + return fd;
> > +}
> > +
> > +static int
> > +vhost_user_create_server(struct vhost_user_socket *vsocket)
> > +{
> > + int fd;
> > + int ret;
> > + struct sockaddr_un un;
> > + const char *path = vsocket->path;
> > +
> > + fd = create_unix_socket(path, &un, vsocket->is_server);
> > + if (fd < 0)
> > return -1;
> > +
> > + ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
> > + if (ret < 0) {
> > + RTE_LOG(ERR, VHOST_CONFIG,
> > + "failed to bind to %s: %s; remove it and try again\n",
> > + path, strerror(errno));
> > + goto err;
> > }
> > + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> >
> > - vsocket = calloc(sizeof(struct vhost_user_socket), 1);
> > - if (vsocket == NULL) {
> > - pthread_mutex_unlock(&vhost_user.mutex);
> > + ret = listen(fd, MAX_VIRTIO_BACKLOG);
> > + if (ret < 0)
> > + goto err;
> > +
> > + vsocket->listenfd = fd;
> > + fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
> > + NULL, vsocket);
> > +
> > + return 0;
> > +
> > +err:
> > + close(fd);
> > + return -1;
> > +}
> > +
> > +static int
> > +vhost_user_create_client(struct vhost_user_socket *vsocket)
> > +{
> > + int fd;
> > + int ret;
> > + struct sockaddr_un un;
> > + const char *path = vsocket->path;
> > +
> > + fd = create_unix_socket(path, &un, vsocket->is_server);
> > + if (fd < 0)
> > + return -1;
> > +
> > + ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
> > + if (ret < 0) {
> > + RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
> > + path, strerror(errno));
> > + close(fd);
> > return -1;
> > }
> >
> > - vsocket->listenfd = uds_socket(path);
> > - if (vsocket->listenfd < 0) {
> > - free(vsocket);
> > - pthread_mutex_unlock(&vhost_user.mutex);
> > + vhost_user_add_connection(fd, vsocket);
> > +
> > + return 0;
> > +}
> > +
> > +/*
> > + * Register a new vhost-user socket; here we could act as server
> > + * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
> > + * is set.
> > + */
> > +int
> > +rte_vhost_driver_register(const char *path, uint64_t flags)
> > +{
> > + int ret = -1;
> > + struct vhost_user_socket *vsocket;
> > +
> > + if (!path)
> > return -1;
> > +
> > + pthread_mutex_lock(&vhost_user.mutex);
> > +
> > + if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
> > + RTE_LOG(ERR, VHOST_CONFIG,
> > + "error: the number of vhost sockets reaches maximum\n");
> > + goto out;
> > }
> >
> > + vsocket = malloc(sizeof(struct vhost_user_socket));
> > + if (!vsocket)
> > + goto out;
> > + memset(vsocket, 0, sizeof(struct vhost_user_socket));
> > vsocket->path = strdup(path);
> >
> > - fdset_add(&vhost_user.fdset, vsocket->listenfd,
> > - vhost_user_new_connection, NULL, vsocket);
> > + if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
> > + ret = vhost_user_create_client(vsocket);
> > + } else {
> > + vsocket->is_server = 1;
> > + ret = vhost_user_create_server(vsocket);
> > + }
> > + if (ret < 0) {
> > + free(vsocket->path);
> > + free(vsocket);
> > + goto out;
> > + }
> >
> > vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
> > +
> > +out:
> > pthread_mutex_unlock(&vhost_user.mutex);
> >
> > - return 0;
> > + return ret;
> > }
> >
> > -
> > /**
> > - * Unregister the specified vhost server
> > + * Unregister the specified vhost socket
> > */
> > int
> > rte_vhost_driver_unregister(const char *path)
> > --
> > 1.9.0
> >
> >
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
2016-05-07 6:40 [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
` (5 preceding siblings ...)
2016-05-07 6:40 ` [dpdk-dev] [PATCH 6/6] vhost: add pmd " Yuanhan Liu
@ 2016-05-10 3:23 ` Xu, Qian Q
2016-05-10 17:41 ` Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
7 siblings, 1 reply; 50+ messages in thread
From: Xu, Qian Q @ 2016-05-10 3:23 UTC (permalink / raw)
To: Yuanhan Liu, dev; +Cc: Xie, Huawei
Do we need patch qemu for the reconnect case?
Thanks
Qian
-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yuanhan Liu
Sent: Saturday, May 07, 2016 2:40 PM
To: dev@dpdk.org
Cc: Xie, Huawei; Yuanhan Liu
Subject: [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
Both the vhost-user backend (DPDK here) and frontend (QEMU) could be server, as well as client. DPDK just acts as server so far. This patch set would make it possible to act as both.
A new arg (flags) is introduced for API rte_vhost_driver_register(). And the client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this implies an API breakage. However, since this release deals with ABI/API refactoring, it should not be an issue.
With the DPDK as client, it's easier to implement the "reconnect" ability, which means we could still make vhost-user work after DPDK restarts.
---
Yuanhan Liu (6):
vhost: rename structs for enabling client mode
vhost: add vhost-user client mode
vhost: add reconnect ability
vhost: workaround stale vring base
examples/vhost: add client and reconnect option
vhost: add pmd client and reconnect option
drivers/net/vhost/rte_eth_vhost.c | 54 +++-
examples/vhost/main.c | 23 +-
lib/librte_vhost/rte_virtio_net.h | 12 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 355 ++++++++++++++++++---------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
lib/librte_vhost/virtio-net.c | 8 +
6 files changed, 313 insertions(+), 145 deletions(-)
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-09 18:12 ` Yuanhan Liu
@ 2016-05-10 7:24 ` Xie, Huawei
2016-05-10 7:54 ` Michael S. Tsirkin
0 siblings, 1 reply; 50+ messages in thread
From: Xie, Huawei @ 2016-05-10 7:24 UTC (permalink / raw)
To: Yuanhan Liu, ms >> Michael S. Tsirkin, Loftus, Ciara; +Cc: dev
On 5/10/2016 2:08 AM, Yuanhan Liu wrote:
> On Mon, May 09, 2016 at 04:47:02PM +0000, Xie, Huawei wrote:
>> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
>>> +static void *
>>> +vhost_user_client_reconnect(void *arg)
>>> +{
>>> + struct reconnect_info *reconn = arg;
>>> + int ret;
>>> +
>>> + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
>>> + while (1) {
>>> + ret = connect(reconn->fd, (struct sockaddr *)&reconn->un,
>>> + sizeof(reconn->un));
>>> + if (ret == 0)
>>> + break;
>>> + sleep(1);
>>> + }
>>> +
>>> + vhost_user_add_connection(reconn->fd, reconn->vsocket);
>>> + free(reconn);
>>> +
>>> + return NULL;
>>> +}
>>> +
>> We could create hundreds of vhost-user ports in OVS. Wihout connections
>> with QEMU established, those ports are just inactive. This works fine in
>> server mode.
>> With client modes, do we need to create hundreds of vhost threads? This
>> would be too interruptible.
>> How about we create only one thread and do the reconnections for all the
>> unconnected socket?
> Yes, good point and good suggestion. Will do it in v2.
Hi Michael:
This reminds me another irrelevant issue.
In OVS, currently for each vhost port, we create an unix domain socket,
and QEMU vhost proxy connects to this socket, and we use this to
identify the connection. This works fine but is our workaround,
otherwise we have no way to identify the connection.
Do you think if this is an issue?
Do we have plan to support identification in VHOST_USER_MESSAGE? With
the identification, if vhost as server, we only need to create one
socket which receives multiple connections, and use the ID in the
message to identify the connection.
/huawei
>
> Thanks.
>
> --yliu
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-10 7:24 ` Xie, Huawei
@ 2016-05-10 7:54 ` Michael S. Tsirkin
2016-05-10 8:07 ` Xie, Huawei
0 siblings, 1 reply; 50+ messages in thread
From: Michael S. Tsirkin @ 2016-05-10 7:54 UTC (permalink / raw)
To: Xie, Huawei; +Cc: Yuanhan Liu, Loftus, Ciara, dev
On Tue, May 10, 2016 at 07:24:10AM +0000, Xie, Huawei wrote:
> On 5/10/2016 2:08 AM, Yuanhan Liu wrote:
> > On Mon, May 09, 2016 at 04:47:02PM +0000, Xie, Huawei wrote:
> >> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> >>> +static void *
> >>> +vhost_user_client_reconnect(void *arg)
> >>> +{
> >>> + struct reconnect_info *reconn = arg;
> >>> + int ret;
> >>> +
> >>> + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
> >>> + while (1) {
> >>> + ret = connect(reconn->fd, (struct sockaddr *)&reconn->un,
> >>> + sizeof(reconn->un));
> >>> + if (ret == 0)
> >>> + break;
> >>> + sleep(1);
> >>> + }
> >>> +
> >>> + vhost_user_add_connection(reconn->fd, reconn->vsocket);
> >>> + free(reconn);
> >>> +
> >>> + return NULL;
> >>> +}
> >>> +
> >> We could create hundreds of vhost-user ports in OVS. Wihout connections
> >> with QEMU established, those ports are just inactive. This works fine in
> >> server mode.
> >> With client modes, do we need to create hundreds of vhost threads? This
> >> would be too interruptible.
> >> How about we create only one thread and do the reconnections for all the
> >> unconnected socket?
> > Yes, good point and good suggestion. Will do it in v2.
>
> Hi Michael:
> This reminds me another irrelevant issue.
> In OVS, currently for each vhost port, we create an unix domain socket,
> and QEMU vhost proxy connects to this socket, and we use this to
> identify the connection. This works fine but is our workaround,
> otherwise we have no way to identify the connection.
> Do you think if this is an issue?
I'm sorry, I have trouble understanding what you wrote above.
What is the issue you are trying to work around?
> Do we have plan to support identification in VHOST_USER_MESSAGE? With
> the identification, if vhost as server, we only need to create one
> socket which receives multiple connections, and use the ID in the
> message to identify the connection.
>
> /huawei
Sending e.g. -name string from qemu over vhost might be useful
for debugging, but I'm not sure it's a good idea to
rely on it being unique.
>
> >
> > Thanks.
> >
> > --yliu
> >
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-10 7:54 ` Michael S. Tsirkin
@ 2016-05-10 8:07 ` Xie, Huawei
2016-05-10 8:42 ` Michael S. Tsirkin
0 siblings, 1 reply; 50+ messages in thread
From: Xie, Huawei @ 2016-05-10 8:07 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: Yuanhan Liu, Loftus, Ciara, dev
On 5/10/2016 3:56 PM, Michael S. Tsirkin wrote:
> On Tue, May 10, 2016 at 07:24:10AM +0000, Xie, Huawei wrote:
>> On 5/10/2016 2:08 AM, Yuanhan Liu wrote:
>>> On Mon, May 09, 2016 at 04:47:02PM +0000, Xie, Huawei wrote:
>>>> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
>>>>> +static void *
>>>>> +vhost_user_client_reconnect(void *arg)
>>>>> +{
>>>>> + struct reconnect_info *reconn = arg;
>>>>> + int ret;
>>>>> +
>>>>> + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
>>>>> + while (1) {
>>>>> + ret = connect(reconn->fd, (struct sockaddr *)&reconn->un,
>>>>> + sizeof(reconn->un));
>>>>> + if (ret == 0)
>>>>> + break;
>>>>> + sleep(1);
>>>>> + }
>>>>> +
>>>>> + vhost_user_add_connection(reconn->fd, reconn->vsocket);
>>>>> + free(reconn);
>>>>> +
>>>>> + return NULL;
>>>>> +}
>>>>> +
>>>> We could create hundreds of vhost-user ports in OVS. Wihout connections
>>>> with QEMU established, those ports are just inactive. This works fine in
>>>> server mode.
>>>> With client modes, do we need to create hundreds of vhost threads? This
>>>> would be too interruptible.
>>>> How about we create only one thread and do the reconnections for all the
>>>> unconnected socket?
>>> Yes, good point and good suggestion. Will do it in v2.
>> Hi Michael:
>> This reminds me another irrelevant issue.
>> In OVS, currently for each vhost port, we create an unix domain socket,
>> and QEMU vhost proxy connects to this socket, and we use this to
>> identify the connection. This works fine but is our workaround,
>> otherwise we have no way to identify the connection.
>> Do you think if this is an issue?
Let us say vhost creates one unix domain socket, with path as "sockpath",
and two virtio ports in two VMS both connect to the same socket with the
following command line
-chardev socket,id=char0,path=sockpath
How could vhost identify the connection?
Workarounds:
vhost creates two unix domain sockets, with path as "sockpath1" and
"sockpath2" respectively,
and the virtio ports in two VMS respectively connect to "sockpath1" and
"sockpath2".
If we have some name string from QEMU over vhost, as you mentioned, we
could create only one socket with path "sockpath".
User ensure that the names are unique, just as how they do with multiple
sockets.
> I'm sorry, I have trouble understanding what you wrote above.
> What is the issue you are trying to work around?
>
>> Do we have plan to support identification in VHOST_USER_MESSAGE? With
>> the identification, if vhost as server, we only need to create one
>> socket which receives multiple connections, and use the ID in the
>> message to identify the connection.
>>
>> /huawei
> Sending e.g. -name string from qemu over vhost might be useful
> for debugging, but I'm not sure it's a good idea to
> rely on it being unique.
>
>>> Thanks.
>>>
>>> --yliu
>>>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
2016-05-07 6:40 ` [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base Yuanhan Liu
` (2 preceding siblings ...)
2016-05-09 16:25 ` Xie, Huawei
@ 2016-05-10 8:21 ` Xie, Huawei
3 siblings, 0 replies; 50+ messages in thread
From: Xie, Huawei @ 2016-05-10 8:21 UTC (permalink / raw)
To: Yuanhan Liu, dev; +Cc: Michael S. Tsirkin
On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> When DPDK app crashes (or quits, or gets killed), and when QEMU supports
> reconnecting (patches have been sent, not merged yet), a restart of DPDK
> app would get stale vring base from QEMU. That would break the kernel
> virtio net completely, making it non-work any more, unless a driver reset
> is done.
s/DPDK app/DPDK vhost/
DPDK app is confusing
>
> So, instead of getting the stale vring base from QEMU, Huawei suggested
> we could get a proper one from used->idx.
>
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Suggested-by: "Xie, Huawei" <huawei.xie@intel.com>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Acked-by: Huawei Xie <huawei.xie@intel.com>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-10 8:07 ` Xie, Huawei
@ 2016-05-10 8:42 ` Michael S. Tsirkin
2016-05-10 9:00 ` Xie, Huawei
0 siblings, 1 reply; 50+ messages in thread
From: Michael S. Tsirkin @ 2016-05-10 8:42 UTC (permalink / raw)
To: Xie, Huawei; +Cc: Yuanhan Liu, Loftus, Ciara, dev
On Tue, May 10, 2016 at 08:07:00AM +0000, Xie, Huawei wrote:
> On 5/10/2016 3:56 PM, Michael S. Tsirkin wrote:
> > On Tue, May 10, 2016 at 07:24:10AM +0000, Xie, Huawei wrote:
> >> On 5/10/2016 2:08 AM, Yuanhan Liu wrote:
> >>> On Mon, May 09, 2016 at 04:47:02PM +0000, Xie, Huawei wrote:
> >>>> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> >>>>> +static void *
> >>>>> +vhost_user_client_reconnect(void *arg)
> >>>>> +{
> >>>>> + struct reconnect_info *reconn = arg;
> >>>>> + int ret;
> >>>>> +
> >>>>> + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
> >>>>> + while (1) {
> >>>>> + ret = connect(reconn->fd, (struct sockaddr *)&reconn->un,
> >>>>> + sizeof(reconn->un));
> >>>>> + if (ret == 0)
> >>>>> + break;
> >>>>> + sleep(1);
> >>>>> + }
> >>>>> +
> >>>>> + vhost_user_add_connection(reconn->fd, reconn->vsocket);
> >>>>> + free(reconn);
> >>>>> +
> >>>>> + return NULL;
> >>>>> +}
> >>>>> +
> >>>> We could create hundreds of vhost-user ports in OVS. Wihout connections
> >>>> with QEMU established, those ports are just inactive. This works fine in
> >>>> server mode.
> >>>> With client modes, do we need to create hundreds of vhost threads? This
> >>>> would be too interruptible.
> >>>> How about we create only one thread and do the reconnections for all the
> >>>> unconnected socket?
> >>> Yes, good point and good suggestion. Will do it in v2.
> >> Hi Michael:
> >> This reminds me another irrelevant issue.
> >> In OVS, currently for each vhost port, we create an unix domain socket,
> >> and QEMU vhost proxy connects to this socket, and we use this to
> >> identify the connection. This works fine but is our workaround,
> >> otherwise we have no way to identify the connection.
> >> Do you think if this is an issue?
>
> Let us say vhost creates one unix domain socket, with path as "sockpath",
> and two virtio ports in two VMS both connect to the same socket with the
> following command line
> -chardev socket,id=char0,path=sockpath
> How could vhost identify the connection?
getpeername(2)?
>
> Workarounds:
> vhost creates two unix domain sockets, with path as "sockpath1" and
> "sockpath2" respectively,
> and the virtio ports in two VMS respectively connect to "sockpath1" and
> "sockpath2".
>
> If we have some name string from QEMU over vhost, as you mentioned, we
> could create only one socket with path "sockpath".
> User ensure that the names are unique, just as how they do with multiple
> sockets.
>
Seems rather fragile.
> > I'm sorry, I have trouble understanding what you wrote above.
> > What is the issue you are trying to work around?
> >
> >> Do we have plan to support identification in VHOST_USER_MESSAGE? With
> >> the identification, if vhost as server, we only need to create one
> >> socket which receives multiple connections, and use the ID in the
> >> message to identify the connection.
> >>
> >> /huawei
> > Sending e.g. -name string from qemu over vhost might be useful
> > for debugging, but I'm not sure it's a good idea to
> > rely on it being unique.
> >
> >>> Thanks.
> >>>
> >>> --yliu
> >>>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-10 8:42 ` Michael S. Tsirkin
@ 2016-05-10 9:00 ` Xie, Huawei
2016-05-10 9:17 ` Michael S. Tsirkin
0 siblings, 1 reply; 50+ messages in thread
From: Xie, Huawei @ 2016-05-10 9:00 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: Yuanhan Liu, Loftus, Ciara, dev
On 5/10/2016 4:42 PM, Michael S. Tsirkin wrote:
> On Tue, May 10, 2016 at 08:07:00AM +0000, Xie, Huawei wrote:
>> On 5/10/2016 3:56 PM, Michael S. Tsirkin wrote:
>>> On Tue, May 10, 2016 at 07:24:10AM +0000, Xie, Huawei wrote:
>>>> On 5/10/2016 2:08 AM, Yuanhan Liu wrote:
>>>>> On Mon, May 09, 2016 at 04:47:02PM +0000, Xie, Huawei wrote:
>>>>>> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
>>>>>>> +static void *
>>>>>>> +vhost_user_client_reconnect(void *arg)
>>>>>>> +{
>>>>>>> + struct reconnect_info *reconn = arg;
>>>>>>> + int ret;
>>>>>>> +
>>>>>>> + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
>>>>>>> + while (1) {
>>>>>>> + ret = connect(reconn->fd, (struct sockaddr *)&reconn->un,
>>>>>>> + sizeof(reconn->un));
>>>>>>> + if (ret == 0)
>>>>>>> + break;
>>>>>>> + sleep(1);
>>>>>>> + }
>>>>>>> +
>>>>>>> + vhost_user_add_connection(reconn->fd, reconn->vsocket);
>>>>>>> + free(reconn);
>>>>>>> +
>>>>>>> + return NULL;
>>>>>>> +}
>>>>>>> +
>>>>>> We could create hundreds of vhost-user ports in OVS. Wihout connections
>>>>>> with QEMU established, those ports are just inactive. This works fine in
>>>>>> server mode.
>>>>>> With client modes, do we need to create hundreds of vhost threads? This
>>>>>> would be too interruptible.
>>>>>> How about we create only one thread and do the reconnections for all the
>>>>>> unconnected socket?
>>>>> Yes, good point and good suggestion. Will do it in v2.
>>>> Hi Michael:
>>>> This reminds me another irrelevant issue.
>>>> In OVS, currently for each vhost port, we create an unix domain socket,
>>>> and QEMU vhost proxy connects to this socket, and we use this to
>>>> identify the connection. This works fine but is our workaround,
>>>> otherwise we have no way to identify the connection.
>>>> Do you think if this is an issue?
>> Let us say vhost creates one unix domain socket, with path as "sockpath",
>> and two virtio ports in two VMS both connect to the same socket with the
>> following command line
>> -chardev socket,id=char0,path=sockpath
>> How could vhost identify the connection?
> getpeername(2)?
getpeer name returns host/port? then it isn't useful.
The typical scenario in my mind is:
We create a OVS port with the name "port1", and when we receive an
virtio connection with ID "port1", we attach this virtio interface to
the OVS port "port1".
>
>
>> Workarounds:
>> vhost creates two unix domain sockets, with path as "sockpath1" and
>> "sockpath2" respectively,
>> and the virtio ports in two VMS respectively connect to "sockpath1" and
>> "sockpath2".
>>
>> If we have some name string from QEMU over vhost, as you mentioned, we
>> could create only one socket with path "sockpath".
>> User ensure that the names are unique, just as how they do with multiple
>> sockets.
>>
> Seems rather fragile.
>From the scenario above, it is enough. That is also how it works today
in DPDK OVS implementation with multiple sockets.
Any other idea?
>
>>> I'm sorry, I have trouble understanding what you wrote above.
>>> What is the issue you are trying to work around?
>>>
>>>> Do we have plan to support identification in VHOST_USER_MESSAGE? With
>>>> the identification, if vhost as server, we only need to create one
>>>> socket which receives multiple connections, and use the ID in the
>>>> message to identify the connection.
>>>>
>>>> /huawei
>>> Sending e.g. -name string from qemu over vhost might be useful
>>> for debugging, but I'm not sure it's a good idea to
>>> rely on it being unique.
>>>
>>>>> Thanks.
>>>>>
>>>>> --yliu
>>>>>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-10 9:00 ` Xie, Huawei
@ 2016-05-10 9:17 ` Michael S. Tsirkin
2016-05-10 17:17 ` Loftus, Ciara
0 siblings, 1 reply; 50+ messages in thread
From: Michael S. Tsirkin @ 2016-05-10 9:17 UTC (permalink / raw)
To: Xie, Huawei; +Cc: Yuanhan Liu, Loftus, Ciara, dev
On Tue, May 10, 2016 at 09:00:45AM +0000, Xie, Huawei wrote:
> On 5/10/2016 4:42 PM, Michael S. Tsirkin wrote:
> > On Tue, May 10, 2016 at 08:07:00AM +0000, Xie, Huawei wrote:
> >> On 5/10/2016 3:56 PM, Michael S. Tsirkin wrote:
> >>> On Tue, May 10, 2016 at 07:24:10AM +0000, Xie, Huawei wrote:
> >>>> On 5/10/2016 2:08 AM, Yuanhan Liu wrote:
> >>>>> On Mon, May 09, 2016 at 04:47:02PM +0000, Xie, Huawei wrote:
> >>>>>> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> >>>>>>> +static void *
> >>>>>>> +vhost_user_client_reconnect(void *arg)
> >>>>>>> +{
> >>>>>>> + struct reconnect_info *reconn = arg;
> >>>>>>> + int ret;
> >>>>>>> +
> >>>>>>> + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
> >>>>>>> + while (1) {
> >>>>>>> + ret = connect(reconn->fd, (struct sockaddr *)&reconn->un,
> >>>>>>> + sizeof(reconn->un));
> >>>>>>> + if (ret == 0)
> >>>>>>> + break;
> >>>>>>> + sleep(1);
> >>>>>>> + }
> >>>>>>> +
> >>>>>>> + vhost_user_add_connection(reconn->fd, reconn->vsocket);
> >>>>>>> + free(reconn);
> >>>>>>> +
> >>>>>>> + return NULL;
> >>>>>>> +}
> >>>>>>> +
> >>>>>> We could create hundreds of vhost-user ports in OVS. Wihout connections
> >>>>>> with QEMU established, those ports are just inactive. This works fine in
> >>>>>> server mode.
> >>>>>> With client modes, do we need to create hundreds of vhost threads? This
> >>>>>> would be too interruptible.
> >>>>>> How about we create only one thread and do the reconnections for all the
> >>>>>> unconnected socket?
> >>>>> Yes, good point and good suggestion. Will do it in v2.
> >>>> Hi Michael:
> >>>> This reminds me another irrelevant issue.
> >>>> In OVS, currently for each vhost port, we create an unix domain socket,
> >>>> and QEMU vhost proxy connects to this socket, and we use this to
> >>>> identify the connection. This works fine but is our workaround,
> >>>> otherwise we have no way to identify the connection.
> >>>> Do you think if this is an issue?
> >> Let us say vhost creates one unix domain socket, with path as "sockpath",
> >> and two virtio ports in two VMS both connect to the same socket with the
> >> following command line
> >> -chardev socket,id=char0,path=sockpath
> >> How could vhost identify the connection?
> > getpeername(2)?
>
> getpeer name returns host/port? then it isn't useful.
Maybe but I'm still in the dark. Useful for what?
> The typical scenario in my mind is:
> We create a OVS port with the name "port1", and when we receive an
> virtio connection with ID "port1", we attach this virtio interface to
> the OVS port "port1".
If you are going to listen on a socket, you can create ports
and attach socket fds to it dynamically as long as you get connections.
What is wrong with that?
>
> >
> >
> >> Workarounds:
> >> vhost creates two unix domain sockets, with path as "sockpath1" and
> >> "sockpath2" respectively,
> >> and the virtio ports in two VMS respectively connect to "sockpath1" and
> >> "sockpath2".
> >>
> >> If we have some name string from QEMU over vhost, as you mentioned, we
> >> could create only one socket with path "sockpath".
> >> User ensure that the names are unique, just as how they do with multiple
> >> sockets.
> >>
> > Seems rather fragile.
>
> >From the scenario above, it is enough. That is also how it works today
> in DPDK OVS implementation with multiple sockets.
> Any other idea?
>
> >
> >>> I'm sorry, I have trouble understanding what you wrote above.
> >>> What is the issue you are trying to work around?
> >>>
> >>>> Do we have plan to support identification in VHOST_USER_MESSAGE? With
> >>>> the identification, if vhost as server, we only need to create one
> >>>> socket which receives multiple connections, and use the ID in the
> >>>> message to identify the connection.
> >>>>
> >>>> /huawei
> >>> Sending e.g. -name string from qemu over vhost might be useful
> >>> for debugging, but I'm not sure it's a good idea to
> >>> rely on it being unique.
> >>>
> >>>>> Thanks.
> >>>>>
> >>>>> --yliu
> >>>>>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-10 9:17 ` Michael S. Tsirkin
@ 2016-05-10 17:17 ` Loftus, Ciara
2016-05-11 21:46 ` Michael S. Tsirkin
0 siblings, 1 reply; 50+ messages in thread
From: Loftus, Ciara @ 2016-05-10 17:17 UTC (permalink / raw)
To: Michael S. Tsirkin, Xie, Huawei; +Cc: Yuanhan Liu, dev
> On Tue, May 10, 2016 at 09:00:45AM +0000, Xie, Huawei wrote:
> > On 5/10/2016 4:42 PM, Michael S. Tsirkin wrote:
> > > On Tue, May 10, 2016 at 08:07:00AM +0000, Xie, Huawei wrote:
> > >> On 5/10/2016 3:56 PM, Michael S. Tsirkin wrote:
> > >>> On Tue, May 10, 2016 at 07:24:10AM +0000, Xie, Huawei wrote:
> > >>>> On 5/10/2016 2:08 AM, Yuanhan Liu wrote:
> > >>>>> On Mon, May 09, 2016 at 04:47:02PM +0000, Xie, Huawei wrote:
> > >>>>>> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> > >>>>>>> +static void *
> > >>>>>>> +vhost_user_client_reconnect(void *arg)
> > >>>>>>> +{
> > >>>>>>> + struct reconnect_info *reconn = arg;
> > >>>>>>> + int ret;
> > >>>>>>> +
> > >>>>>>> + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
> > >>>>>>> + while (1) {
> > >>>>>>> + ret = connect(reconn->fd, (struct sockaddr
> *)&reconn->un,
> > >>>>>>> + sizeof(reconn->un));
> > >>>>>>> + if (ret == 0)
> > >>>>>>> + break;
> > >>>>>>> + sleep(1);
> > >>>>>>> + }
> > >>>>>>> +
> > >>>>>>> + vhost_user_add_connection(reconn->fd, reconn->vsocket);
> > >>>>>>> + free(reconn);
> > >>>>>>> +
> > >>>>>>> + return NULL;
> > >>>>>>> +}
> > >>>>>>> +
> > >>>>>> We could create hundreds of vhost-user ports in OVS. Wihout
> connections
> > >>>>>> with QEMU established, those ports are just inactive. This works
> fine in
> > >>>>>> server mode.
> > >>>>>> With client modes, do we need to create hundreds of vhost
> threads? This
> > >>>>>> would be too interruptible.
> > >>>>>> How about we create only one thread and do the reconnections
> for all the
> > >>>>>> unconnected socket?
> > >>>>> Yes, good point and good suggestion. Will do it in v2.
> > >>>> Hi Michael:
> > >>>> This reminds me another irrelevant issue.
> > >>>> In OVS, currently for each vhost port, we create an unix domain
> socket,
> > >>>> and QEMU vhost proxy connects to this socket, and we use this to
> > >>>> identify the connection. This works fine but is our workaround,
> > >>>> otherwise we have no way to identify the connection.
> > >>>> Do you think if this is an issue?
> > >> Let us say vhost creates one unix domain socket, with path as
> "sockpath",
> > >> and two virtio ports in two VMS both connect to the same socket with
> the
> > >> following command line
> > >> -chardev socket,id=char0,path=sockpath
> > >> How could vhost identify the connection?
> > > getpeername(2)?
> >
> > getpeer name returns host/port? then it isn't useful.
>
> Maybe but I'm still in the dark. Useful for what?
>
> > The typical scenario in my mind is:
> > We create a OVS port with the name "port1", and when we receive an
> > virtio connection with ID "port1", we attach this virtio interface to
> > the OVS port "port1".
>
> If you are going to listen on a socket, you can create ports
> and attach socket fds to it dynamically as long as you get connections.
> What is wrong with that?
Hi Michael,
I haven't reviewed the patchset fully, but to hopefully provide more clarify on how OVS uses vHost:
OVS with DPDK needs some way to distinguish vHost connections from one another so it can switch traffic to the correct port depending on how the switch is programmed.
At the moment this is achieved by:
1. user provides unique port name eg. 'vhost0' (this is normal behaviour in OVS - checks are put in place to avoid overlapping port names)
2. DPDK vHost lib creates socket called 'vhost0'
3. VM launched with vhost0 socket // -chardev socket,id=char0,path=/path/to/vhost0
4. OVS receives 'new_device' vhost callback, checks the name of the device (virtio_dev->ifname == vhost0?), if the name matches the name provided in step 1, OVS stores the virtio_net *dev pointer
5. OVS uses *dev pointer to send and receive traffic to vhost0 via calls to vhost library functions eg. enqueue(*dev) / dequeue(*dev)
6. Repeat for multiple vhost devices
We need to make sure that there is still some way to distinguish devices from one another like in step 4. Let me know if you need any further clarification.
Thanks,
Ciara
>
>
> >
> > >
> > >
> > >> Workarounds:
> > >> vhost creates two unix domain sockets, with path as "sockpath1" and
> > >> "sockpath2" respectively,
> > >> and the virtio ports in two VMS respectively connect to "sockpath1" and
> > >> "sockpath2".
> > >>
> > >> If we have some name string from QEMU over vhost, as you
> mentioned, we
> > >> could create only one socket with path "sockpath".
> > >> User ensure that the names are unique, just as how they do with
> multiple
> > >> sockets.
> > >>
> > > Seems rather fragile.
> >
> > >From the scenario above, it is enough. That is also how it works today
> > in DPDK OVS implementation with multiple sockets.
> > Any other idea?
> >
> > >
> > >>> I'm sorry, I have trouble understanding what you wrote above.
> > >>> What is the issue you are trying to work around?
> > >>>
> > >>>> Do we have plan to support identification in
> VHOST_USER_MESSAGE? With
> > >>>> the identification, if vhost as server, we only need to create one
> > >>>> socket which receives multiple connections, and use the ID in the
> > >>>> message to identify the connection.
> > >>>>
> > >>>> /huawei
> > >>> Sending e.g. -name string from qemu over vhost might be useful
> > >>> for debugging, but I'm not sure it's a good idea to
> > >>> rely on it being unique.
> > >>>
> > >>>>> Thanks.
> > >>>>>
> > >>>>> --yliu
> > >>>>>
> >
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
2016-05-10 3:23 ` [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Xu, Qian Q
@ 2016-05-10 17:41 ` Yuanhan Liu
0 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-10 17:41 UTC (permalink / raw)
To: Xu, Qian Q; +Cc: dev, Xie, Huawei, marcandre.lureau
On Tue, May 10, 2016 at 03:23:15AM +0000, Xu, Qian Q wrote:
> Do we need patch qemu for the reconnect case?
Yes, we need some support from QEMU: currently QEMU will not be able
to detect disconnection and hence will not establish the connection
when DPDK vhost restarts.
Following patchset from Marc resolves above issue.
http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg01507.html
And note that unlike the vhost-user multiple queue support that depends
on some new vhost-uesr message from QEMU, this patchset does not depond
on QEMU.
--yliu
>
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yuanhan Liu
> Sent: Saturday, May 07, 2016 2:40 PM
> To: dev@dpdk.org
> Cc: Xie, Huawei; Yuanhan Liu
> Subject: [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability
>
> Both the vhost-user backend (DPDK here) and frontend (QEMU) could be server, as well as client. DPDK just acts as server so far. This patch set would make it possible to act as both.
>
> A new arg (flags) is introduced for API rte_vhost_driver_register(). And the client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this implies an API breakage. However, since this release deals with ABI/API refactoring, it should not be an issue.
>
> With the DPDK as client, it's easier to implement the "reconnect" ability, which means we could still make vhost-user work after DPDK restarts.
>
>
> ---
> Yuanhan Liu (6):
> vhost: rename structs for enabling client mode
> vhost: add vhost-user client mode
> vhost: add reconnect ability
> vhost: workaround stale vring base
> examples/vhost: add client and reconnect option
> vhost: add pmd client and reconnect option
>
> drivers/net/vhost/rte_eth_vhost.c | 54 +++-
> examples/vhost/main.c | 23 +-
> lib/librte_vhost/rte_virtio_net.h | 12 +-
> lib/librte_vhost/vhost_user/vhost-net-user.c | 355 ++++++++++++++++++---------
> lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
> lib/librte_vhost/virtio-net.c | 8 +
> 6 files changed, 313 insertions(+), 145 deletions(-)
>
> --
> 1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability
2016-05-10 17:17 ` Loftus, Ciara
@ 2016-05-11 21:46 ` Michael S. Tsirkin
0 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2016-05-11 21:46 UTC (permalink / raw)
To: Loftus, Ciara; +Cc: Xie, Huawei, Yuanhan Liu, dev
On Tue, May 10, 2016 at 05:17:50PM +0000, Loftus, Ciara wrote:
> > On Tue, May 10, 2016 at 09:00:45AM +0000, Xie, Huawei wrote:
> > > On 5/10/2016 4:42 PM, Michael S. Tsirkin wrote:
> > > > On Tue, May 10, 2016 at 08:07:00AM +0000, Xie, Huawei wrote:
> > > >> On 5/10/2016 3:56 PM, Michael S. Tsirkin wrote:
> > > >>> On Tue, May 10, 2016 at 07:24:10AM +0000, Xie, Huawei wrote:
> > > >>>> On 5/10/2016 2:08 AM, Yuanhan Liu wrote:
> > > >>>>> On Mon, May 09, 2016 at 04:47:02PM +0000, Xie, Huawei wrote:
> > > >>>>>> On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> > > >>>>>>> +static void *
> > > >>>>>>> +vhost_user_client_reconnect(void *arg)
> > > >>>>>>> +{
> > > >>>>>>> + struct reconnect_info *reconn = arg;
> > > >>>>>>> + int ret;
> > > >>>>>>> +
> > > >>>>>>> + RTE_LOG(ERR, VHOST_CONFIG, "reconnecting...\n");
> > > >>>>>>> + while (1) {
> > > >>>>>>> + ret = connect(reconn->fd, (struct sockaddr
> > *)&reconn->un,
> > > >>>>>>> + sizeof(reconn->un));
> > > >>>>>>> + if (ret == 0)
> > > >>>>>>> + break;
> > > >>>>>>> + sleep(1);
> > > >>>>>>> + }
> > > >>>>>>> +
> > > >>>>>>> + vhost_user_add_connection(reconn->fd, reconn->vsocket);
> > > >>>>>>> + free(reconn);
> > > >>>>>>> +
> > > >>>>>>> + return NULL;
> > > >>>>>>> +}
> > > >>>>>>> +
> > > >>>>>> We could create hundreds of vhost-user ports in OVS. Wihout
> > connections
> > > >>>>>> with QEMU established, those ports are just inactive. This works
> > fine in
> > > >>>>>> server mode.
> > > >>>>>> With client modes, do we need to create hundreds of vhost
> > threads? This
> > > >>>>>> would be too interruptible.
> > > >>>>>> How about we create only one thread and do the reconnections
> > for all the
> > > >>>>>> unconnected socket?
> > > >>>>> Yes, good point and good suggestion. Will do it in v2.
> > > >>>> Hi Michael:
> > > >>>> This reminds me another irrelevant issue.
> > > >>>> In OVS, currently for each vhost port, we create an unix domain
> > socket,
> > > >>>> and QEMU vhost proxy connects to this socket, and we use this to
> > > >>>> identify the connection. This works fine but is our workaround,
> > > >>>> otherwise we have no way to identify the connection.
> > > >>>> Do you think if this is an issue?
> > > >> Let us say vhost creates one unix domain socket, with path as
> > "sockpath",
> > > >> and two virtio ports in two VMS both connect to the same socket with
> > the
> > > >> following command line
> > > >> -chardev socket,id=char0,path=sockpath
> > > >> How could vhost identify the connection?
> > > > getpeername(2)?
> > >
> > > getpeer name returns host/port? then it isn't useful.
> >
> > Maybe but I'm still in the dark. Useful for what?
> >
> > > The typical scenario in my mind is:
> > > We create a OVS port with the name "port1", and when we receive an
> > > virtio connection with ID "port1", we attach this virtio interface to
> > > the OVS port "port1".
> >
> > If you are going to listen on a socket, you can create ports
> > and attach socket fds to it dynamically as long as you get connections.
> > What is wrong with that?
>
> Hi Michael,
>
> I haven't reviewed the patchset fully, but to hopefully provide more clarify on how OVS uses vHost:
>
> OVS with DPDK needs some way to distinguish vHost connections from one another so it can switch traffic to the correct port depending on how the switch is programmed.
> At the moment this is achieved by:
> 1. user provides unique port name eg. 'vhost0' (this is normal behaviour in OVS - checks are put in place to avoid overlapping port names)
> 2. DPDK vHost lib creates socket called 'vhost0'
> 3. VM launched with vhost0 socket // -chardev socket,id=char0,path=/path/to/vhost0
> 4. OVS receives 'new_device' vhost callback, checks the name of the device (virtio_dev->ifname == vhost0?), if the name matches the name provided in step 1, OVS stores the virtio_net *dev pointer
> 5. OVS uses *dev pointer to send and receive traffic to vhost0 via calls to vhost library functions eg. enqueue(*dev) / dequeue(*dev)
> 6. Repeat for multiple vhost devices
>
> We need to make sure that there is still some way to distinguish devices from one another like in step 4. Let me know if you need any further clarification.
>
> Thanks,
> Ciara
I see. I think that the path approach is better personally -
it is easier to secure, just give each QEMU access to the correct
path only.
> >
> >
> > >
> > > >
> > > >
> > > >> Workarounds:
> > > >> vhost creates two unix domain sockets, with path as "sockpath1" and
> > > >> "sockpath2" respectively,
> > > >> and the virtio ports in two VMS respectively connect to "sockpath1" and
> > > >> "sockpath2".
> > > >>
> > > >> If we have some name string from QEMU over vhost, as you
> > mentioned, we
> > > >> could create only one socket with path "sockpath".
> > > >> User ensure that the names are unique, just as how they do with
> > multiple
> > > >> sockets.
> > > >>
> > > > Seems rather fragile.
> > >
> > > >From the scenario above, it is enough. That is also how it works today
> > > in DPDK OVS implementation with multiple sockets.
> > > Any other idea?
> > >
> > > >
> > > >>> I'm sorry, I have trouble understanding what you wrote above.
> > > >>> What is the issue you are trying to work around?
> > > >>>
> > > >>>> Do we have plan to support identification in
> > VHOST_USER_MESSAGE? With
> > > >>>> the identification, if vhost as server, we only need to create one
> > > >>>> socket which receives multiple connections, and use the ID in the
> > > >>>> message to identify the connection.
> > > >>>>
> > > >>>> /huawei
> > > >>> Sending e.g. -name string from qemu over vhost might be useful
> > > >>> for debugging, but I'm not sure it's a good idea to
> > > >>> rely on it being unique.
> > > >>>
> > > >>>>> Thanks.
> > > >>>>>
> > > >>>>> --yliu
> > > >>>>>
> > >
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v2 0/6] vhost: add vhost-user client mode and reconnect ability
2016-05-07 6:40 [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
` (6 preceding siblings ...)
2016-05-10 3:23 ` [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Xu, Qian Q
@ 2016-05-13 6:16 ` Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
` (6 more replies)
7 siblings, 7 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-13 6:16 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
v2: - added release doc
- do not remove socket file for the client mode
- create one thread ony to handle all reconnects
NOTE: I created a branch at dpdk.org [0] for more convenient testing:
[0]: git://dpdk.org/next/dpdk-next-virtio for-testing
When the DPDK vhost-user application (such as OVS) restarts (due to
crash, or update), the vhost-user connection between DPDK and QEMU
won't be established automatically again. In another word, the virtio
net is broken.
The reason it doesn't work is that DPDK just acts as server only.
A restart of the server needs a reconnection from the client (QEMU).
However, reconnect from QEMU is not supported from QEMU.
Adding the support of client mode and let DPDK be the client somehow
would resolve above issue a bit easier: DPDK vhost-user as the client,
a restart of DPDK would naturally try to connect to the server (QEMU)
automatically.
Therefore, this patchset implements the DPDK vhost-user client mode, by
introducing a new arg (flags) for API rte_vhost_driver_register(). And the
client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this
implies an API breakage. However, since this release deals with ABI/API
refactoring, it should not be an issue.
Another interesting thing to make it work is that you not only have
to consider that case the DPDK vhost-user app might restart, but also
have to think that QEMU might restart as well: guest OS sometimes
just reboots. In such case, when the server is down, the client has
to keep reconnecting with the server until the server is back and the
connection is established again. And that's what "reconnect" patch for.
Note that current QEMU doesn't not support a second time connection
from client, thus a restart of DPDK vhost-user will not work. This is
because current QEMU won't be able to detect the disconnect from
restart, thus it will not listen for later connections. Patches [1] have
been sent, it's just not merged yet. But unlike the vhost-user mulitple
queue case, that we have critical depends on QEMU implementation, here
we have no such dependency, therefore, I think it's okay to make DPDK
be ready for the "reconnect" stuff first. (note that I also mentioned
this fact in the release doc).
[1]: http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg01507.html
Thanks.
--yliu
---
Yuanhan Liu (6):
vhost: rename structs for enabling client mode
vhost: add vhost-user client mode
vhost: add reconnect ability
vhost: workaround stale vring base
examples/vhost: add client and reconnect option
vhost: add pmd client and reconnect option
doc/guides/rel_notes/release_16_07.rst | 23 ++
drivers/net/vhost/rte_eth_vhost.c | 54 +++-
examples/vhost/main.c | 23 +-
lib/librte_vhost/rte_virtio_net.h | 12 +-
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 8 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 403 ++++++++++++++++++---------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
lib/librte_vhost/virtio-net.c | 9 +
8 files changed, 390 insertions(+), 148 deletions(-)
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v2 1/6] vhost: rename structs for enabling client mode
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
@ 2016-05-13 6:16 ` Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 2/6] vhost: add vhost-user " Yuanhan Liu
` (5 subsequent siblings)
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-13 6:16 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
DPDK vhost-user just acts as server so far, so, using a struct named
as "vhost_server" is okay. However, if we add client mode, it doesn't
make sense any more. Here renames it to "vhost_user_socket".
There was no obvious wrong about "connfd_ctx", but I think it's obviously
better to rename it to "vhost_user_connection", as it does represent
a connection, a connection between the backend (DPDK) and the frontend
(QEMU).
Similarly, few more renames are taken, such as "vserver_new_vq_conn"
to "vhost_user_new_connection".
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
lib/librte_vhost/vhost_user/vhost-net-user.c | 131 ++++++++++++++-------------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 --
2 files changed, 70 insertions(+), 67 deletions(-)
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 68fc9b9..f485a3b 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -51,32 +51,41 @@
#include "vhost-net.h"
#include "virtio-net-user.h"
-#define MAX_VIRTIO_BACKLOG 128
-
-static void vserver_new_vq_conn(int fd, void *data, int *remove);
-static void vserver_message_handler(int fd, void *dat, int *remove);
+/*
+ * Every time rte_vhost_driver_register() is invoked, an associated
+ * vhost_user_socket struct will be created.
+ */
+struct vhost_user_socket {
+ char *path;
+ int listenfd;
+};
-struct connfd_ctx {
- struct vhost_server *vserver;
+struct vhost_user_connection {
+ struct vhost_user_socket *vsocket;
int vid;
};
-#define MAX_VHOST_SERVER 1024
-struct _vhost_server {
- struct vhost_server *server[MAX_VHOST_SERVER];
+#define MAX_VHOST_SOCKET 1024
+struct vhost_user {
+ struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
struct fdset fdset;
- int vserver_cnt;
- pthread_mutex_t server_mutex;
+ int vsocket_cnt;
+ pthread_mutex_t mutex;
};
-static struct _vhost_server g_vhost_server = {
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vhost_user_new_connection(int fd, void *data, int *remove);
+static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+
+static struct vhost_user vhost_user = {
.fdset = {
.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
.num = 0
},
- .vserver_cnt = 0,
- .server_mutex = PTHREAD_MUTEX_INITIALIZER,
+ .vsocket_cnt = 0,
+ .mutex = PTHREAD_MUTEX_INITIALIZER,
};
static const char *vhost_message_str[VHOST_USER_MAX] = {
@@ -278,13 +287,13 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
return ret;
}
-/* call back when there is new virtio connection. */
+/* call back when there is new vhost-user connection. */
static void
-vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
+vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
{
- struct vhost_server *vserver = (struct vhost_server *)dat;
+ struct vhost_user_socket *vsocket = dat;
int conn_fd;
- struct connfd_ctx *ctx;
+ struct vhost_user_connection *conn;
int vid;
unsigned int size;
@@ -294,41 +303,41 @@ vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
if (conn_fd < 0)
return;
- ctx = calloc(1, sizeof(*ctx));
- if (ctx == NULL) {
+ conn = calloc(1, sizeof(*conn));
+ if (conn == NULL) {
close(conn_fd);
return;
}
vid = vhost_new_device();
if (vid == -1) {
- free(ctx);
+ free(conn);
close(conn_fd);
return;
}
- size = strnlen(vserver->path, PATH_MAX);
- vhost_set_ifname(vid, vserver->path, size);
+ size = strnlen(vsocket->path, PATH_MAX);
+ vhost_set_ifname(vid, vsocket->path, size);
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
- ctx->vserver = vserver;
- ctx->vid = vid;
- fdset_add(&g_vhost_server.fdset,
- conn_fd, vserver_message_handler, NULL, ctx);
+ conn->vsocket = vsocket;
+ conn->vid = vid;
+ fdset_add(&vhost_user.fdset,
+ conn_fd, vhost_user_msg_handler, NULL, conn);
}
/* callback when there is message on the connfd */
static void
-vserver_message_handler(int connfd, void *dat, int *remove)
+vhost_user_msg_handler(int connfd, void *dat, int *remove)
{
int vid;
- struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
+ struct vhost_user_connection *conn = dat;
struct VhostUserMsg msg;
uint64_t features;
int ret;
- vid = cfd_ctx->vid;
+ vid = conn->vid;
ret = read_vhost_message(connfd, &msg);
if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
if (ret < 0)
@@ -343,7 +352,7 @@ vserver_message_handler(int connfd, void *dat, int *remove)
close(connfd);
*remove = 1;
- free(cfd_ctx);
+ free(conn);
vhost_destroy_device(vid);
return;
@@ -449,37 +458,37 @@ vserver_message_handler(int connfd, void *dat, int *remove)
int
rte_vhost_driver_register(const char *path)
{
- struct vhost_server *vserver;
+ struct vhost_user_socket *vsocket;
- pthread_mutex_lock(&g_vhost_server.server_mutex);
+ pthread_mutex_lock(&vhost_user.mutex);
- if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
+ if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
RTE_LOG(ERR, VHOST_CONFIG,
"error: the number of servers reaches maximum\n");
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
- vserver = calloc(sizeof(struct vhost_server), 1);
- if (vserver == NULL) {
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ vsocket = calloc(sizeof(struct vhost_user_socket), 1);
+ if (vsocket == NULL) {
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
- vserver->listenfd = uds_socket(path);
- if (vserver->listenfd < 0) {
- free(vserver);
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ vsocket->listenfd = uds_socket(path);
+ if (vsocket->listenfd < 0) {
+ free(vsocket);
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
- vserver->path = strdup(path);
+ vsocket->path = strdup(path);
- fdset_add(&g_vhost_server.fdset, vserver->listenfd,
- vserver_new_vq_conn, NULL, vserver);
+ fdset_add(&vhost_user.fdset, vsocket->listenfd,
+ vhost_user_new_connection, NULL, vsocket);
- g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+ pthread_mutex_unlock(&vhost_user.mutex);
return 0;
}
@@ -494,28 +503,28 @@ rte_vhost_driver_unregister(const char *path)
int i;
int count;
- pthread_mutex_lock(&g_vhost_server.server_mutex);
+ pthread_mutex_lock(&vhost_user.mutex);
- for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
- if (!strcmp(g_vhost_server.server[i]->path, path)) {
- fdset_del(&g_vhost_server.fdset,
- g_vhost_server.server[i]->listenfd);
+ for (i = 0; i < vhost_user.vsocket_cnt; i++) {
+ if (!strcmp(vhost_user.vsockets[i]->path, path)) {
+ fdset_del(&vhost_user.fdset,
+ vhost_user.vsockets[i]->listenfd);
- close(g_vhost_server.server[i]->listenfd);
- free(g_vhost_server.server[i]->path);
- free(g_vhost_server.server[i]);
+ close(vhost_user.vsockets[i]->listenfd);
+ free(vhost_user.vsockets[i]->path);
+ free(vhost_user.vsockets[i]);
unlink(path);
- count = --g_vhost_server.vserver_cnt;
- g_vhost_server.server[i] = g_vhost_server.server[count];
- g_vhost_server.server[count] = NULL;
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ count = --vhost_user.vsocket_cnt;
+ vhost_user.vsockets[i] = vhost_user.vsockets[count];
+ vhost_user.vsockets[count] = NULL;
+ pthread_mutex_unlock(&vhost_user.mutex);
return 0;
}
}
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
@@ -523,6 +532,6 @@ rte_vhost_driver_unregister(const char *path)
int
rte_vhost_driver_session_start(void)
{
- fdset_event_dispatch(&g_vhost_server.fdset);
+ fdset_event_dispatch(&vhost_user.fdset);
return 0;
}
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
index ec68b96..f533239 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -38,12 +38,6 @@
#include <linux/vhost.h>
#include "rte_virtio_net.h"
-#include "fd_man.h"
-
-struct vhost_server {
- char *path; /**< The path the uds is bind to. */
- int listenfd; /**< The listener sockfd. */
-};
/* refer to hw/virtio/vhost-user.c */
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v2 2/6] vhost: add vhost-user client mode
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
@ 2016-05-13 6:16 ` Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 3/6] vhost: add reconnect ability Yuanhan Liu
` (4 subsequent siblings)
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-13 6:16 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
Add a new paramter (flags) to rte_vhost_driver_register(). DPDK
vhost-user acts as client mode when RTE_VHOST_USER_CLIENT flag
is set. The flags would also allow future extensions without
breaking the API (again).
The rest is straingfoward then: allocate a unix socket, and
bind/listen for server, connect for client.
This extension is for vhost-user only, therefore we simply quit
and report error when any flags are given for vhost-cuse.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v2: - add relase doc
- do not remove socket file for client mode
---
doc/guides/rel_notes/release_16_07.rst | 14 ++
drivers/net/vhost/rte_eth_vhost.c | 2 +-
examples/vhost/main.c | 2 +-
lib/librte_vhost/rte_virtio_net.h | 11 +-
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 8 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 227 ++++++++++++++++-----------
6 files changed, 170 insertions(+), 94 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index ebc507b..da2c6cf 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -34,6 +34,20 @@ This section should contain new features added in this release. Sample format:
Refer to the previous release notes for examples.
+* **Added vhost-user client mode.**
+
+ DPDK vhost-user could be the server as well as the client. It supports
+ server mode only before, now it also supports client mode. Client mode
+ is enabled when RTE_VHOST_USER_CLIENT flag is set while calling
+ ``rte_vhost_driver_register``.
+
+ When DPDK vhost-user restarts from normal or abnormal quit (say crash),
+ the client mode would allow DPDK to establish the connect again. Note
+ that a brand new QEMU version is needed to make it work: current QEMU
+ simply can not accept the connection from DPDK vhost-user restart. At
+ the time of writing this, patches for QEMU are not merged yet, but it's
+ likely to get merged in v2.7.
+
Resolved Issues
---------------
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 56c1c36..91b5d95 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -467,7 +467,7 @@ eth_dev_start(struct rte_eth_dev *dev)
int ret = 0;
if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
- ret = rte_vhost_driver_register(internal->iface_name);
+ ret = rte_vhost_driver_register(internal->iface_name, 0);
if (ret)
return ret;
}
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index f3a6277..1391cd6 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1496,7 +1496,7 @@ main(int argc, char *argv[])
rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
/* Register vhost(cuse or user) driver to handle vhost messages. */
- ret = rte_vhost_driver_register((char *)&dev_basename);
+ ret = rte_vhost_driver_register(dev_basename, 0);
if (ret != 0)
rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index bc2b74b..5f69e78 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -51,6 +51,8 @@
#include <rte_mempool.h>
#include <rte_ether.h>
+#define RTE_VHOST_USER_CLIENT (1ULL << 0)
+
/* Enum for virtqueue management. */
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
@@ -85,11 +87,14 @@ uint64_t rte_vhost_feature_get(void);
int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
-/* Register vhost driver. dev_name could be different for multiple instance support. */
-int rte_vhost_driver_register(const char *dev_name);
+/**
+ * Register vhost driver. path could be different for multiple
+ * instance support.
+ */
+int rte_vhost_driver_register(const char *path, uint64_t flags);
/* Unregister vhost driver. This is only meaningful to vhost user. */
-int rte_vhost_driver_unregister(const char *dev_name);
+int rte_vhost_driver_unregister(const char *path);
/* Register callbacks. */
int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index cf6d191..5d15011 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -352,7 +352,7 @@ static const struct cuse_lowlevel_ops vhost_net_ops = {
* vhost_net_device_ops are also passed when the device is registered in app.
*/
int
-rte_vhost_driver_register(const char *dev_name)
+rte_vhost_driver_register(const char *dev_name, uint64_t flags)
{
struct cuse_info cuse_info;
char device_name[PATH_MAX] = "";
@@ -364,6 +364,12 @@ rte_vhost_driver_register(const char *dev_name)
char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
+ if (flags) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost-cuse does not support any flags so far\n");
+ return -1;
+ }
+
if (access(cuse_device_name, R_OK | W_OK) < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"char device %s can't be accessed, maybe not exist\n",
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index f485a3b..a480f9f 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -33,6 +33,7 @@
#include <stdint.h>
#include <stdio.h>
+#include <stdbool.h>
#include <limits.h>
#include <stdlib.h>
#include <unistd.h>
@@ -58,6 +59,7 @@
struct vhost_user_socket {
char *path;
int listenfd;
+ bool is_server;
};
struct vhost_user_connection {
@@ -75,7 +77,7 @@ struct vhost_user {
#define MAX_VIRTIO_BACKLOG 128
-static void vhost_user_new_connection(int fd, void *data, int *remove);
+static void vhost_user_server_new_connection(int fd, void *data, int *remove);
static void vhost_user_msg_handler(int fd, void *dat, int *remove);
static struct vhost_user vhost_user = {
@@ -111,48 +113,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
};
-/**
- * Create a unix domain socket, bind to path and listen for connection.
- * @return
- * socket fd or -1 on failure
- */
-static int
-uds_socket(const char *path)
-{
- struct sockaddr_un un;
- int sockfd;
- int ret;
-
- if (path == NULL)
- return -1;
-
- sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
- if (sockfd < 0)
- return -1;
- RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
-
- memset(&un, 0, sizeof(un));
- un.sun_family = AF_UNIX;
- snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
- ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
- if (ret == -1) {
- RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n",
- sockfd, path);
- goto err;
- }
- RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
-
- ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
- if (ret == -1)
- goto err;
-
- return sockfd;
-
-err:
- close(sockfd);
- return -1;
-}
-
/* return bytes# of read on success or negative val on failure. */
static int
read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
@@ -287,32 +247,24 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
return ret;
}
-/* call back when there is new vhost-user connection. */
+
static void
-vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
+vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
{
- struct vhost_user_socket *vsocket = dat;
- int conn_fd;
- struct vhost_user_connection *conn;
int vid;
- unsigned int size;
-
- conn_fd = accept(fd, NULL, NULL);
- RTE_LOG(INFO, VHOST_CONFIG,
- "new virtio connection is %d\n", conn_fd);
- if (conn_fd < 0)
- return;
+ size_t size;
+ struct vhost_user_connection *conn;
- conn = calloc(1, sizeof(*conn));
+ conn = malloc(sizeof(*conn));
if (conn == NULL) {
- close(conn_fd);
+ close(fd);
return;
}
vid = vhost_new_device();
if (vid == -1) {
+ close(fd);
free(conn);
- close(conn_fd);
return;
}
@@ -323,8 +275,21 @@ vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
conn->vsocket = vsocket;
conn->vid = vid;
- fdset_add(&vhost_user.fdset,
- conn_fd, vhost_user_msg_handler, NULL, conn);
+ fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn);
+}
+
+/* call back when there is new vhost-user connection from client */
+static void
+vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
+{
+ struct vhost_user_socket *vsocket = dat;
+
+ fd = accept(fd, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
+ vhost_user_add_connection(fd, vsocket);
}
/* callback when there is message on the connfd */
@@ -452,50 +417,135 @@ vhost_user_msg_handler(int connfd, void *dat, int *remove)
}
}
-/**
- * Creates and initialise the vhost server.
- */
-int
-rte_vhost_driver_register(const char *path)
+static int
+create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
{
- struct vhost_user_socket *vsocket;
+ int fd;
- pthread_mutex_lock(&vhost_user.mutex);
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -1;
+ RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
+ is_server ? "server" : "client", fd);
- if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "error: the number of servers reaches maximum\n");
- pthread_mutex_unlock(&vhost_user.mutex);
+ memset(un, 0, sizeof(*un));
+ un->sun_family = AF_UNIX;
+ strncpy(un->sun_path, path, sizeof(un->sun_path));
+
+ return fd;
+}
+
+static int
+vhost_user_create_server(struct vhost_user_socket *vsocket)
+{
+ int fd;
+ int ret;
+ struct sockaddr_un un;
+ const char *path = vsocket->path;
+
+ fd = create_unix_socket(path, &un, vsocket->is_server);
+ if (fd < 0)
return -1;
+
+ ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to bind to %s: %s; remove it and try again\n",
+ path, strerror(errno));
+ goto err;
}
+ RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
- vsocket = calloc(sizeof(struct vhost_user_socket), 1);
- if (vsocket == NULL) {
- pthread_mutex_unlock(&vhost_user.mutex);
+ ret = listen(fd, MAX_VIRTIO_BACKLOG);
+ if (ret < 0)
+ goto err;
+
+ vsocket->listenfd = fd;
+ fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
+ NULL, vsocket);
+
+ return 0;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static int
+vhost_user_create_client(struct vhost_user_socket *vsocket)
+{
+ int fd;
+ int ret;
+ struct sockaddr_un un;
+ const char *path = vsocket->path;
+
+ fd = create_unix_socket(path, &un, vsocket->is_server);
+ if (fd < 0)
+ return -1;
+
+ ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
+ path, strerror(errno));
+ close(fd);
return -1;
}
- vsocket->listenfd = uds_socket(path);
- if (vsocket->listenfd < 0) {
- free(vsocket);
- pthread_mutex_unlock(&vhost_user.mutex);
+ vhost_user_add_connection(fd, vsocket);
+
+ return 0;
+}
+
+/*
+ * Register a new vhost-user socket; here we could act as server
+ * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
+ * is set.
+ */
+int
+rte_vhost_driver_register(const char *path, uint64_t flags)
+{
+ int ret = -1;
+ struct vhost_user_socket *vsocket;
+
+ if (!path)
return -1;
+
+ pthread_mutex_lock(&vhost_user.mutex);
+
+ if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "error: the number of vhost sockets reaches maximum\n");
+ goto out;
}
+ vsocket = malloc(sizeof(struct vhost_user_socket));
+ if (!vsocket)
+ goto out;
+ memset(vsocket, 0, sizeof(struct vhost_user_socket));
vsocket->path = strdup(path);
- fdset_add(&vhost_user.fdset, vsocket->listenfd,
- vhost_user_new_connection, NULL, vsocket);
+ if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+ ret = vhost_user_create_client(vsocket);
+ } else {
+ vsocket->is_server = true;
+ ret = vhost_user_create_server(vsocket);
+ }
+ if (ret < 0) {
+ free(vsocket->path);
+ free(vsocket);
+ goto out;
+ }
vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+
+out:
pthread_mutex_unlock(&vhost_user.mutex);
- return 0;
+ return ret;
}
-
/**
- * Unregister the specified vhost server
+ * Unregister the specified vhost socket
*/
int
rte_vhost_driver_unregister(const char *path)
@@ -507,15 +557,16 @@ rte_vhost_driver_unregister(const char *path)
for (i = 0; i < vhost_user.vsocket_cnt; i++) {
if (!strcmp(vhost_user.vsockets[i]->path, path)) {
- fdset_del(&vhost_user.fdset,
- vhost_user.vsockets[i]->listenfd);
+ if (vhost_user.vsockets[i]->is_server) {
+ fdset_del(&vhost_user.fdset,
+ vhost_user.vsockets[i]->listenfd);
+ close(vhost_user.vsockets[i]->listenfd);
+ unlink(path);
+ }
- close(vhost_user.vsockets[i]->listenfd);
free(vhost_user.vsockets[i]->path);
free(vhost_user.vsockets[i]);
- unlink(path);
-
count = --vhost_user.vsocket_cnt;
vhost_user.vsockets[i] = vhost_user.vsockets[count];
vhost_user.vsockets[count] = NULL;
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v2 3/6] vhost: add reconnect ability
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 2/6] vhost: add vhost-user " Yuanhan Liu
@ 2016-05-13 6:16 ` Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 4/6] vhost: workaround stale vring base Yuanhan Liu
` (3 subsequent siblings)
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-13 6:16 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
Allow reconnecting on failure when both RTE_VHOST_USER_RECONNECT and
RTE_VHOST_USER_CLIENT flags are set.
Reconnecting means two things here:
- when DPDK app starts first and QEMU (as the server) is not started,
without reconnecting, DPDK app would simply fail on vhost-user
registration.
- when QEMU reboots, without reconnecting, you can't re-establish the
connection without restarting DPDK app.
This patch make it work well for both above cases. It simply creates
a new thread, and keep trying calling "connect()", until it succeeds.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v2: - create one thread only to handle all reconnects
- update release note on reconnect
---
doc/guides/rel_notes/release_16_07.rst | 9 +++
lib/librte_vhost/rte_virtio_net.h | 1 +
lib/librte_vhost/vhost_user/vhost-net-user.c | 103 +++++++++++++++++++++++++--
3 files changed, 109 insertions(+), 4 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index da2c6cf..c711b48 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -48,6 +48,15 @@ This section should contain new features added in this release. Sample format:
the time of writing this, patches for QEMU are not merged yet, but it's
likely to get merged in v2.7.
+* **Added vhost-user reconnect ability.**
+
+ Reconnect will only work in the client mode and when RTE_VHOST_USER_RECONNECT
+ flag is set. It allows DPDK to keep trying to connect to the server
+ (QEMU) until it succeeds when
+
+ * the first connect fails (when QEMU is not started yet)
+ * connection is broken (when QEMU restarts)
+
Resolved Issues
---------------
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 5f69e78..57eb03d 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -52,6 +52,7 @@
#include <rte_ether.h>
#define RTE_VHOST_USER_CLIENT (1ULL << 0)
+#define RTE_VHOST_USER_RECONNECT (1ULL << 1)
/* Enum for virtqueue management. */
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index a480f9f..6ede8a6 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -41,6 +41,7 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
+#include <sys/queue.h>
#include <errno.h>
#include <pthread.h>
@@ -60,6 +61,7 @@ struct vhost_user_socket {
char *path;
int listenfd;
bool is_server;
+ bool reconnect;
};
struct vhost_user_connection {
@@ -79,6 +81,7 @@ struct vhost_user {
static void vhost_user_server_new_connection(int fd, void *data, int *remove);
static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+static int vhost_user_create_client(struct vhost_user_socket *vsocket);
static struct vhost_user vhost_user = {
.fdset = {
@@ -305,6 +308,8 @@ vhost_user_msg_handler(int connfd, void *dat, int *remove)
vid = conn->vid;
ret = read_vhost_message(connfd, &msg);
if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+ struct vhost_user_socket *vsocket = conn->vsocket;
+
if (ret < 0)
RTE_LOG(ERR, VHOST_CONFIG,
"vhost read message failed\n");
@@ -320,6 +325,9 @@ vhost_user_msg_handler(int connfd, void *dat, int *remove)
free(conn);
vhost_destroy_device(vid);
+ if (vsocket->reconnect)
+ vhost_user_create_client(vsocket);
+
return;
}
@@ -471,6 +479,73 @@ err:
return -1;
}
+struct vhost_user_reconnect {
+ struct sockaddr_un un;
+ int fd;
+ struct vhost_user_socket *vsocket;
+
+ TAILQ_ENTRY(vhost_user_reconnect) next;
+};
+
+TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
+struct vhost_user_reconnect_list {
+ struct vhost_user_reconnect_tailq_list head;
+ pthread_mutex_t mutex;
+};
+
+static struct vhost_user_reconnect_list reconn_list;
+static pthread_t reconn_tid;
+
+static void *
+vhost_user_client_reconnect(void *arg __rte_unused)
+{
+ struct vhost_user_reconnect *reconn, *next;
+
+ while (1) {
+ pthread_mutex_lock(&reconn_list.mutex);
+
+ /*
+ * An equal implementation of TAILQ_FOREACH_SAFE,
+ * which does not exist on all platforms.
+ */
+ for (reconn = TAILQ_FIRST(&reconn_list.head);
+ reconn != NULL; reconn = next) {
+ next = TAILQ_NEXT(reconn, next);
+
+ if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
+ sizeof(reconn->un)) < 0)
+ continue;
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "%s: connected\n", reconn->vsocket->path);
+ vhost_user_add_connection(reconn->fd, reconn->vsocket);
+ TAILQ_REMOVE(&reconn_list.head, reconn, next);
+ free(reconn);
+ }
+
+ pthread_mutex_unlock(&reconn_list.mutex);
+ sleep(1);
+ }
+
+ return NULL;
+}
+
+static int
+vhost_user_reconnect_init(void)
+{
+ int ret;
+
+ pthread_mutex_init(&reconn_list.mutex, NULL);
+ TAILQ_INIT(&reconn_list.head);
+
+ ret = pthread_create(&reconn_tid, NULL,
+ vhost_user_client_reconnect, NULL);
+ if (ret < 0)
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
+
+ return ret;
+}
+
static int
vhost_user_create_client(struct vhost_user_socket *vsocket)
{
@@ -478,20 +553,35 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
int ret;
struct sockaddr_un un;
const char *path = vsocket->path;
+ struct vhost_user_reconnect *reconn;
fd = create_unix_socket(path, &un, vsocket->is_server);
if (fd < 0)
return -1;
ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
- path, strerror(errno));
+ if (ret == 0) {
+ vhost_user_add_connection(fd, vsocket);
+ return 0;
+ }
+
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to connect to %s: %s\n",
+ path, strerror(errno));
+
+ if (!vsocket->reconnect) {
close(fd);
return -1;
}
- vhost_user_add_connection(fd, vsocket);
+ RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path);
+ reconn = malloc(sizeof(*reconn));
+ reconn->un = un;
+ reconn->fd = fd;
+ reconn->vsocket = vsocket;
+ pthread_mutex_lock(&reconn_list.mutex);
+ TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
+ pthread_mutex_unlock(&reconn_list.mutex);
return 0;
}
@@ -525,6 +615,11 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
vsocket->path = strdup(path);
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+ vsocket->reconnect = !!(flags & RTE_VHOST_USER_RECONNECT);
+ if (vsocket->reconnect && reconn_tid == 0) {
+ if (vhost_user_reconnect_init() < 0)
+ goto out;
+ }
ret = vhost_user_create_client(vsocket);
} else {
vsocket->is_server = true;
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v2 4/6] vhost: workaround stale vring base
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
` (2 preceding siblings ...)
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 3/6] vhost: add reconnect ability Yuanhan Liu
@ 2016-05-13 6:16 ` Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 5/6] examples/vhost: add client and reconnect option Yuanhan Liu
` (2 subsequent siblings)
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-13 6:16 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu,
Michael S. Tsirkin
When DPDK app crashes (or quits, or gets killed), a restart of DPDK
app would get stale vring base from QEMU. That would break the kernel
virtio net completely, making it non-work any more, unless a driver
reset is done.
So, instead of getting the stale vring base from QEMU, Huawei suggested
we could get a much saner (and may not the most accurate) vring base
from used->idx. That would work because:
- there is a memory barrier between updating used ring entries and
used->idx. So, even though we crashed at updating the used ring
entries, it will not cause any issue, as the guest driver will not
process those stale used entries, for used-idx is not updated yet.
- DPDK process vring in order, that means a crash may just lead some
packet retransmission for Tx and drop for Rx.
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Suggested-by: Huawei Xie <huawei.xie@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Acked-by: "Michael S. Tsirkin" <mst@redhat.com>
Acked-by: Huawei Xie <huawei.xie@intel.com>
---
v2: log on packets resent for Tx and drop for Rx
---
lib/librte_vhost/virtio-net.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 835ab3a..bd7e55e 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -561,6 +561,15 @@ vhost_set_vring_addr(int vid, struct vhost_vring_addr *addr)
return -1;
}
+ if (vq->last_used_idx != vq->used->idx) {
+ RTE_LOG(WARNING, VHOST_CONFIG,
+ "last_used_idx (%u) and vq->used->idx (%u) mismatches; "
+ "some packets maybe resent for Tx and dropped for Rx\n",
+ vq->last_used_idx, vq->used->idx);
+ vq->last_used_idx = vq->used->idx;
+ vq->last_used_idx_res = vq->used->idx;
+ }
+
vq->log_guest_addr = addr->log_guest_addr;
LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v2 5/6] examples/vhost: add client and reconnect option
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
` (3 preceding siblings ...)
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 4/6] vhost: workaround stale vring base Yuanhan Liu
@ 2016-05-13 6:16 ` Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 6/6] vhost: add pmd " Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-13 6:16 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
Add --client and --reconnect option to enable the client mode and
reconnect mode, respectively. --rconnect works only when --client
is given as well.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
examples/vhost/main.c | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 1391cd6..5048d23 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -132,6 +132,9 @@ static uint32_t enable_tx_csum;
/* Disable TSO offload */
static uint32_t enable_tso;
+static int client_mode;
+static int reconnect;
+
/* Specify timeout (in useconds) between retries on RX. */
static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
/* Specify the number of retries on RX. */
@@ -458,7 +461,9 @@ us_vhost_usage(const char *prgname)
" --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
" --dev-basename: The basename to be used for the character device.\n"
" --tx-csum [0|1] disable/enable TX checksum offload.\n"
- " --tso [0|1] disable/enable TCP segment offload.\n",
+ " --tso [0|1] disable/enable TCP segment offload.\n"
+ " --client register a vhost-user socket as client mode.\n"
+ " --reconnect reconnect to vhost-user server when disconnects.\n",
prgname);
}
@@ -483,6 +488,8 @@ us_vhost_parse_args(int argc, char **argv)
{"dev-basename", required_argument, NULL, 0},
{"tx-csum", required_argument, NULL, 0},
{"tso", required_argument, NULL, 0},
+ {"client", no_argument, &client_mode, 1},
+ {"reconnect", no_argument, &reconnect, 1},
{NULL, 0, 0, 0},
};
@@ -646,6 +653,12 @@ us_vhost_parse_args(int argc, char **argv)
}
}
+ if (reconnect && !client_mode) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "--reconnect works only when --client is specified\n");
+ return -1;
+ }
+
for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
if (enabled_port_mask & (1 << i))
ports[num_ports++] = (uint8_t)i;
@@ -1403,6 +1416,7 @@ main(int argc, char *argv[])
uint8_t portid;
static pthread_t tid;
char thread_name[RTE_MAX_THREAD_NAME_LEN];
+ uint64_t flags = 0;
signal(SIGINT, sigint_handler);
@@ -1495,8 +1509,13 @@ main(int argc, char *argv[])
if (mergeable == 0)
rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
+ if (client_mode)
+ flags |= RTE_VHOST_USER_CLIENT;
+ if (reconnect)
+ flags |= RTE_VHOST_USER_RECONNECT;
+
/* Register vhost(cuse or user) driver to handle vhost messages. */
- ret = rte_vhost_driver_register(dev_basename, 0);
+ ret = rte_vhost_driver_register(dev_basename, flags);
if (ret != 0)
rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v2 6/6] vhost: add pmd client and reconnect option
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
` (4 preceding siblings ...)
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 5/6] examples/vhost: add client and reconnect option Yuanhan Liu
@ 2016-05-13 6:16 ` Yuanhan Liu
2016-05-25 17:45 ` Rich Lane
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
6 siblings, 1 reply; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-13 6:16 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
Add client and reconnect option to vhost pmd. reconnect only works when
client is given.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
drivers/net/vhost/rte_eth_vhost.c | 54 ++++++++++++++++++++++++++++++---------
1 file changed, 42 insertions(+), 12 deletions(-)
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 91b5d95..c7e03d0 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -50,12 +50,16 @@
#define ETH_VHOST_IFACE_ARG "iface"
#define ETH_VHOST_QUEUES_ARG "queues"
+#define ETH_VHOST_CLIENT_ARG "client"
+#define ETH_VHOST_RECONNECT_ARG "reconnect"
static const char *drivername = "VHOST PMD";
static const char *valid_arguments[] = {
ETH_VHOST_IFACE_ARG,
ETH_VHOST_QUEUES_ARG,
+ ETH_VHOST_CLIENT_ARG,
+ ETH_VHOST_RECONNECT_ARG,
NULL
};
@@ -89,6 +93,7 @@ struct pmd_internal {
char *dev_name;
char *iface_name;
uint16_t max_queues;
+ uint64_t flags;
volatile uint16_t once;
};
@@ -467,7 +472,8 @@ eth_dev_start(struct rte_eth_dev *dev)
int ret = 0;
if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
- ret = rte_vhost_driver_register(internal->iface_name, 0);
+ ret = rte_vhost_driver_register(internal->iface_name,
+ internal->flags);
if (ret)
return ret;
}
@@ -672,7 +678,7 @@ static const struct eth_dev_ops ops = {
static int
eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues,
- const unsigned numa_node)
+ const unsigned numa_node, uint64_t flags)
{
struct rte_eth_dev_data *data = NULL;
struct pmd_internal *internal = NULL;
@@ -729,6 +735,7 @@ eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues,
internal->iface_name = strdup(iface_name);
if (internal->iface_name == NULL)
goto error;
+ internal->flags = flags;
list->eth_dev = eth_dev;
pthread_mutex_lock(&internal_list_lock);
@@ -793,18 +800,15 @@ open_iface(const char *key __rte_unused, const char *value, void *extra_args)
}
static inline int
-open_queues(const char *key __rte_unused, const char *value, void *extra_args)
+open_int(const char *key __rte_unused, const char *value, void *extra_args)
{
- uint16_t *q = extra_args;
+ uint16_t *n = extra_args;
if (value == NULL || extra_args == NULL)
return -EINVAL;
- *q = (uint16_t)strtoul(value, NULL, 0);
- if (*q == USHRT_MAX && errno == ERANGE)
- return -1;
-
- if (*q > RTE_MAX_QUEUES_PER_PORT)
+ *n = (uint16_t)strtoul(value, NULL, 0);
+ if (*n == USHRT_MAX && errno == ERANGE)
return -1;
return 0;
@@ -817,6 +821,9 @@ rte_pmd_vhost_devinit(const char *name, const char *params)
int ret = 0;
char *iface_name;
uint16_t queues;
+ uint64_t flags = 0;
+ int client_mode;
+ int reconnect;
RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", name);
@@ -836,14 +843,37 @@ rte_pmd_vhost_devinit(const char *name, const char *params)
if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
- &open_queues, &queues);
- if (ret < 0)
+ &open_int, &queues);
+ if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT)
goto out_free;
} else
queues = 1;
- eth_dev_vhost_create(name, iface_name, queues, rte_socket_id());
+ if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) {
+ ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG,
+ &open_int, &client_mode);
+ if (ret < 0)
+ goto out_free;
+ }
+ if (rte_kvargs_count(kvlist, ETH_VHOST_RECONNECT_ARG) == 1) {
+ ret = rte_kvargs_process(kvlist, ETH_VHOST_RECONNECT_ARG,
+ &open_int, &reconnect);
+ if (ret < 0)
+ goto out_free;
+ }
+ if (client_mode)
+ flags |= RTE_VHOST_USER_CLIENT;
+ if (reconnect)
+ flags |= RTE_VHOST_USER_RECONNECT;
+ if (reconnect && !client_mode) {
+ RTE_LOG(ERR, PMD,
+ "reconnect works only when client is specified\n");
+ ret = -1;
+ goto out_free;
+ }
+
+ eth_dev_vhost_create(name, iface_name, queues, rte_socket_id(), flags);
out_free:
rte_kvargs_free(kvlist);
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH v2 6/6] vhost: add pmd client and reconnect option
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 6/6] vhost: add pmd " Yuanhan Liu
@ 2016-05-25 17:45 ` Rich Lane
2016-05-26 8:01 ` Yuanhan Liu
0 siblings, 1 reply; 50+ messages in thread
From: Rich Lane @ 2016-05-25 17:45 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: dev, huawei.xie, Traynor Kevin, marcandre.lureau
>
> @@ -817,6 +821,9 @@ rte_pmd_vhost_devinit(const char *name, const char
> *params)
> int ret = 0;
> char *iface_name;
> uint16_t queues;
> + uint64_t flags = 0;
> + int client_mode;
> + int reconnect;
>
client_mode and reconnect are not initialized if the arguments aren't
passed.
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH v2 6/6] vhost: add pmd client and reconnect option
2016-05-25 17:45 ` Rich Lane
@ 2016-05-26 8:01 ` Yuanhan Liu
0 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-05-26 8:01 UTC (permalink / raw)
To: Rich Lane; +Cc: dev, huawei.xie, Traynor Kevin, marcandre.lureau
On Wed, May 25, 2016 at 10:45:10AM -0700, Rich Lane wrote:
> @@ -817,6 +821,9 @@ rte_pmd_vhost_devinit(const char *name, const char
> *params)
> int ret = 0;
> char *iface_name;
> uint16_t queues;
> + uint64_t flags = 0;
> + int client_mode;
> + int reconnect;
>
>
> client_mode and reconnect are not initialized if the arguments aren't passed.
Right. Thanks for catching it. I'm wondering why GCC doesn't catch it.
--yliu
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
` (5 preceding siblings ...)
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 6/6] vhost: add pmd " Yuanhan Liu
@ 2016-06-07 4:05 ` Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
` (6 more replies)
6 siblings, 7 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-06-07 4:05 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
v3: - make the "reconnect" feature be default for client mode, as it's
good to handle guest OS restart with less effort.
- fix var not-initilized error pointed out by Rich
NOTE: I created a branch at dpdk.org [0] for more convenient testing:
[0]: git://dpdk.org/next/dpdk-next-virtio for-testing
When the DPDK vhost-user application (such as OVS) restarts (due to
crash, or update), the vhost-user connection between DPDK and QEMU
won't be established automatically again. In another word, the virtio
net is broken.
The reason it doesn't work is that DPDK just acts as server only.
A restart of the server needs a reconnection from the client (QEMU).
However, reconnect from QEMU is not supported from QEMU.
Adding the support of client mode and let DPDK be the client somehow
would resolve above issue a bit easier: a restart of DPDK would naturally
try to connect to the server (QEMU) automatically.
Therefore, this patchset implements the DPDK vhost-user client mode, by
introducing a new arg (flags) for API rte_vhost_driver_register(). And the
client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this
implies an API breakage. However, since this release deals with ABI/API
refactoring, it should not be an issue.
Another interesting thing to make it work is that you not only have
to consider that case the DPDK vhost-user app might restart, but also
have to think that QEMU might restart as well: guest OS sometimes
just reboots. In such case, when the server is down, the client has
to keep reconnecting with the server until the server is back and the
connection is established again. And that's what "reconnect" patch for.
Note that current QEMU doesn't not support a second time connection
from client, thus a restart of DPDK vhost-user will not work. This is
because current QEMU won't be able to detect the disconnect from
restart, thus it will not listen for later connections. Patches [1] have
been sent, it's just not merged yet. But unlike the vhost-user mulitple
queue case, that we have critical depends on QEMU implementation, here
we have no such dependency, therefore, I think it's okay to make DPDK
be ready for the "reconnect" stuff first. (note that I also mentioned
this fact in the release doc).
[1]: http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg01507.html
v2: - added release doc
- do not remove socket file for the client mode
- create one thread ony to handle all reconnects
Thanks.
--yliu
---
Yuanhan Liu (6):
vhost: rename structs for enabling client mode
vhost: add vhost-user client mode
vhost: add reconnect ability
vhost: workaround stale vring base
examples/vhost: add client option
vhost: add pmd client option
doc/guides/rel_notes/release_16_07.rst | 21 ++
drivers/net/vhost/rte_eth_vhost.c | 38 ++-
examples/vhost/main.c | 12 +-
lib/librte_vhost/rte_virtio_net.h | 12 +-
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 8 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 403 ++++++++++++++++++---------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
lib/librte_vhost/virtio-net.c | 9 +
8 files changed, 361 insertions(+), 148 deletions(-)
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v3 1/6] vhost: rename structs for enabling client mode
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
@ 2016-06-07 4:05 ` Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 2/6] vhost: add vhost-user " Yuanhan Liu
` (5 subsequent siblings)
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-06-07 4:05 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
DPDK vhost-user just acts as server so far, so, using a struct named
as "vhost_server" is okay. However, if we add client mode, it doesn't
make sense any more. Here renames it to "vhost_user_socket".
There was no obvious wrong about "connfd_ctx", but I think it's obviously
better to rename it to "vhost_user_connection", as it does represent
a connection, a connection between the backend (DPDK) and the frontend
(QEMU).
Similarly, few more renames are taken, such as "vserver_new_vq_conn"
to "vhost_user_new_connection".
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
lib/librte_vhost/vhost_user/vhost-net-user.c | 131 ++++++++++++++-------------
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 --
2 files changed, 70 insertions(+), 67 deletions(-)
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 68fc9b9..f485a3b 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -51,32 +51,41 @@
#include "vhost-net.h"
#include "virtio-net-user.h"
-#define MAX_VIRTIO_BACKLOG 128
-
-static void vserver_new_vq_conn(int fd, void *data, int *remove);
-static void vserver_message_handler(int fd, void *dat, int *remove);
+/*
+ * Every time rte_vhost_driver_register() is invoked, an associated
+ * vhost_user_socket struct will be created.
+ */
+struct vhost_user_socket {
+ char *path;
+ int listenfd;
+};
-struct connfd_ctx {
- struct vhost_server *vserver;
+struct vhost_user_connection {
+ struct vhost_user_socket *vsocket;
int vid;
};
-#define MAX_VHOST_SERVER 1024
-struct _vhost_server {
- struct vhost_server *server[MAX_VHOST_SERVER];
+#define MAX_VHOST_SOCKET 1024
+struct vhost_user {
+ struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
struct fdset fdset;
- int vserver_cnt;
- pthread_mutex_t server_mutex;
+ int vsocket_cnt;
+ pthread_mutex_t mutex;
};
-static struct _vhost_server g_vhost_server = {
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vhost_user_new_connection(int fd, void *data, int *remove);
+static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+
+static struct vhost_user vhost_user = {
.fdset = {
.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
.num = 0
},
- .vserver_cnt = 0,
- .server_mutex = PTHREAD_MUTEX_INITIALIZER,
+ .vsocket_cnt = 0,
+ .mutex = PTHREAD_MUTEX_INITIALIZER,
};
static const char *vhost_message_str[VHOST_USER_MAX] = {
@@ -278,13 +287,13 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
return ret;
}
-/* call back when there is new virtio connection. */
+/* call back when there is new vhost-user connection. */
static void
-vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
+vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
{
- struct vhost_server *vserver = (struct vhost_server *)dat;
+ struct vhost_user_socket *vsocket = dat;
int conn_fd;
- struct connfd_ctx *ctx;
+ struct vhost_user_connection *conn;
int vid;
unsigned int size;
@@ -294,41 +303,41 @@ vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
if (conn_fd < 0)
return;
- ctx = calloc(1, sizeof(*ctx));
- if (ctx == NULL) {
+ conn = calloc(1, sizeof(*conn));
+ if (conn == NULL) {
close(conn_fd);
return;
}
vid = vhost_new_device();
if (vid == -1) {
- free(ctx);
+ free(conn);
close(conn_fd);
return;
}
- size = strnlen(vserver->path, PATH_MAX);
- vhost_set_ifname(vid, vserver->path, size);
+ size = strnlen(vsocket->path, PATH_MAX);
+ vhost_set_ifname(vid, vsocket->path, size);
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
- ctx->vserver = vserver;
- ctx->vid = vid;
- fdset_add(&g_vhost_server.fdset,
- conn_fd, vserver_message_handler, NULL, ctx);
+ conn->vsocket = vsocket;
+ conn->vid = vid;
+ fdset_add(&vhost_user.fdset,
+ conn_fd, vhost_user_msg_handler, NULL, conn);
}
/* callback when there is message on the connfd */
static void
-vserver_message_handler(int connfd, void *dat, int *remove)
+vhost_user_msg_handler(int connfd, void *dat, int *remove)
{
int vid;
- struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
+ struct vhost_user_connection *conn = dat;
struct VhostUserMsg msg;
uint64_t features;
int ret;
- vid = cfd_ctx->vid;
+ vid = conn->vid;
ret = read_vhost_message(connfd, &msg);
if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
if (ret < 0)
@@ -343,7 +352,7 @@ vserver_message_handler(int connfd, void *dat, int *remove)
close(connfd);
*remove = 1;
- free(cfd_ctx);
+ free(conn);
vhost_destroy_device(vid);
return;
@@ -449,37 +458,37 @@ vserver_message_handler(int connfd, void *dat, int *remove)
int
rte_vhost_driver_register(const char *path)
{
- struct vhost_server *vserver;
+ struct vhost_user_socket *vsocket;
- pthread_mutex_lock(&g_vhost_server.server_mutex);
+ pthread_mutex_lock(&vhost_user.mutex);
- if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
+ if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
RTE_LOG(ERR, VHOST_CONFIG,
"error: the number of servers reaches maximum\n");
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
- vserver = calloc(sizeof(struct vhost_server), 1);
- if (vserver == NULL) {
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ vsocket = calloc(sizeof(struct vhost_user_socket), 1);
+ if (vsocket == NULL) {
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
- vserver->listenfd = uds_socket(path);
- if (vserver->listenfd < 0) {
- free(vserver);
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ vsocket->listenfd = uds_socket(path);
+ if (vsocket->listenfd < 0) {
+ free(vsocket);
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
- vserver->path = strdup(path);
+ vsocket->path = strdup(path);
- fdset_add(&g_vhost_server.fdset, vserver->listenfd,
- vserver_new_vq_conn, NULL, vserver);
+ fdset_add(&vhost_user.fdset, vsocket->listenfd,
+ vhost_user_new_connection, NULL, vsocket);
- g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+ pthread_mutex_unlock(&vhost_user.mutex);
return 0;
}
@@ -494,28 +503,28 @@ rte_vhost_driver_unregister(const char *path)
int i;
int count;
- pthread_mutex_lock(&g_vhost_server.server_mutex);
+ pthread_mutex_lock(&vhost_user.mutex);
- for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
- if (!strcmp(g_vhost_server.server[i]->path, path)) {
- fdset_del(&g_vhost_server.fdset,
- g_vhost_server.server[i]->listenfd);
+ for (i = 0; i < vhost_user.vsocket_cnt; i++) {
+ if (!strcmp(vhost_user.vsockets[i]->path, path)) {
+ fdset_del(&vhost_user.fdset,
+ vhost_user.vsockets[i]->listenfd);
- close(g_vhost_server.server[i]->listenfd);
- free(g_vhost_server.server[i]->path);
- free(g_vhost_server.server[i]);
+ close(vhost_user.vsockets[i]->listenfd);
+ free(vhost_user.vsockets[i]->path);
+ free(vhost_user.vsockets[i]);
unlink(path);
- count = --g_vhost_server.vserver_cnt;
- g_vhost_server.server[i] = g_vhost_server.server[count];
- g_vhost_server.server[count] = NULL;
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ count = --vhost_user.vsocket_cnt;
+ vhost_user.vsockets[i] = vhost_user.vsockets[count];
+ vhost_user.vsockets[count] = NULL;
+ pthread_mutex_unlock(&vhost_user.mutex);
return 0;
}
}
- pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
@@ -523,6 +532,6 @@ rte_vhost_driver_unregister(const char *path)
int
rte_vhost_driver_session_start(void)
{
- fdset_event_dispatch(&g_vhost_server.fdset);
+ fdset_event_dispatch(&vhost_user.fdset);
return 0;
}
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
index ec68b96..f533239 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -38,12 +38,6 @@
#include <linux/vhost.h>
#include "rte_virtio_net.h"
-#include "fd_man.h"
-
-struct vhost_server {
- char *path; /**< The path the uds is bind to. */
- int listenfd; /**< The listener sockfd. */
-};
/* refer to hw/virtio/vhost-user.c */
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v3 2/6] vhost: add vhost-user client mode
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
@ 2016-06-07 4:05 ` Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 3/6] vhost: add reconnect ability Yuanhan Liu
` (4 subsequent siblings)
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-06-07 4:05 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
Add a new paramter (flags) to rte_vhost_driver_register(). DPDK
vhost-user acts as client mode when RTE_VHOST_USER_CLIENT flag
is set. The flags would also allow future extensions without
breaking the API (again).
The rest is straingfoward then: allocate a unix socket, and
bind/listen for server, connect for client.
This extension is for vhost-user only, therefore we simply quit
and report error when any flags are given for vhost-cuse.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v2: - add relase doc
- do not remove socket file for client mode
---
doc/guides/rel_notes/release_16_07.rst | 14 ++
drivers/net/vhost/rte_eth_vhost.c | 2 +-
examples/vhost/main.c | 2 +-
lib/librte_vhost/rte_virtio_net.h | 11 +-
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 8 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 227 ++++++++++++++++-----------
6 files changed, 170 insertions(+), 94 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 8dbcf8a..b997a20 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -47,6 +47,20 @@ New Features
* Dropped specific Xen Dom0 code.
* Dropped specific anonymous mempool code in testpmd.
+* **Added vhost-user client mode.**
+
+ DPDK vhost-user could be the server as well as the client. It supports
+ server mode only before, now it also supports client mode. Client mode
+ is enabled when ``RTE_VHOST_USER_CLIENT`` flag is set while calling
+ ``rte_vhost_driver_register``.
+
+ When DPDK vhost-user restarts from normal or abnormal quit (say crash),
+ the client mode would allow DPDK to establish the connect again. Note
+ that a brand new QEMU version is needed to make it work: current QEMU
+ simply can not accept the connection from DPDK vhost-user restart. At
+ the time of writing this, patches for QEMU are not merged yet, but it's
+ likely to get merged in v2.7.
+
Resolved Issues
---------------
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 56c1c36..91b5d95 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -467,7 +467,7 @@ eth_dev_start(struct rte_eth_dev *dev)
int ret = 0;
if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
- ret = rte_vhost_driver_register(internal->iface_name);
+ ret = rte_vhost_driver_register(internal->iface_name, 0);
if (ret)
return ret;
}
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index c854660..9406d94 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1488,7 +1488,7 @@ main(int argc, char *argv[])
rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
/* Register vhost(cuse or user) driver to handle vhost messages. */
- ret = rte_vhost_driver_register((char *)&dev_basename);
+ ret = rte_vhost_driver_register(dev_basename, 0);
if (ret != 0)
rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index bc2b74b..5f69e78 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -51,6 +51,8 @@
#include <rte_mempool.h>
#include <rte_ether.h>
+#define RTE_VHOST_USER_CLIENT (1ULL << 0)
+
/* Enum for virtqueue management. */
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
@@ -85,11 +87,14 @@ uint64_t rte_vhost_feature_get(void);
int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
-/* Register vhost driver. dev_name could be different for multiple instance support. */
-int rte_vhost_driver_register(const char *dev_name);
+/**
+ * Register vhost driver. path could be different for multiple
+ * instance support.
+ */
+int rte_vhost_driver_register(const char *path, uint64_t flags);
/* Unregister vhost driver. This is only meaningful to vhost user. */
-int rte_vhost_driver_unregister(const char *dev_name);
+int rte_vhost_driver_unregister(const char *path);
/* Register callbacks. */
int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index cf6d191..5d15011 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -352,7 +352,7 @@ static const struct cuse_lowlevel_ops vhost_net_ops = {
* vhost_net_device_ops are also passed when the device is registered in app.
*/
int
-rte_vhost_driver_register(const char *dev_name)
+rte_vhost_driver_register(const char *dev_name, uint64_t flags)
{
struct cuse_info cuse_info;
char device_name[PATH_MAX] = "";
@@ -364,6 +364,12 @@ rte_vhost_driver_register(const char *dev_name)
char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
+ if (flags) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost-cuse does not support any flags so far\n");
+ return -1;
+ }
+
if (access(cuse_device_name, R_OK | W_OK) < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"char device %s can't be accessed, maybe not exist\n",
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index f485a3b..a480f9f 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -33,6 +33,7 @@
#include <stdint.h>
#include <stdio.h>
+#include <stdbool.h>
#include <limits.h>
#include <stdlib.h>
#include <unistd.h>
@@ -58,6 +59,7 @@
struct vhost_user_socket {
char *path;
int listenfd;
+ bool is_server;
};
struct vhost_user_connection {
@@ -75,7 +77,7 @@ struct vhost_user {
#define MAX_VIRTIO_BACKLOG 128
-static void vhost_user_new_connection(int fd, void *data, int *remove);
+static void vhost_user_server_new_connection(int fd, void *data, int *remove);
static void vhost_user_msg_handler(int fd, void *dat, int *remove);
static struct vhost_user vhost_user = {
@@ -111,48 +113,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
};
-/**
- * Create a unix domain socket, bind to path and listen for connection.
- * @return
- * socket fd or -1 on failure
- */
-static int
-uds_socket(const char *path)
-{
- struct sockaddr_un un;
- int sockfd;
- int ret;
-
- if (path == NULL)
- return -1;
-
- sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
- if (sockfd < 0)
- return -1;
- RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
-
- memset(&un, 0, sizeof(un));
- un.sun_family = AF_UNIX;
- snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
- ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
- if (ret == -1) {
- RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n",
- sockfd, path);
- goto err;
- }
- RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
-
- ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
- if (ret == -1)
- goto err;
-
- return sockfd;
-
-err:
- close(sockfd);
- return -1;
-}
-
/* return bytes# of read on success or negative val on failure. */
static int
read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
@@ -287,32 +247,24 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
return ret;
}
-/* call back when there is new vhost-user connection. */
+
static void
-vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
+vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
{
- struct vhost_user_socket *vsocket = dat;
- int conn_fd;
- struct vhost_user_connection *conn;
int vid;
- unsigned int size;
-
- conn_fd = accept(fd, NULL, NULL);
- RTE_LOG(INFO, VHOST_CONFIG,
- "new virtio connection is %d\n", conn_fd);
- if (conn_fd < 0)
- return;
+ size_t size;
+ struct vhost_user_connection *conn;
- conn = calloc(1, sizeof(*conn));
+ conn = malloc(sizeof(*conn));
if (conn == NULL) {
- close(conn_fd);
+ close(fd);
return;
}
vid = vhost_new_device();
if (vid == -1) {
+ close(fd);
free(conn);
- close(conn_fd);
return;
}
@@ -323,8 +275,21 @@ vhost_user_new_connection(int fd, void *dat, int *remove __rte_unused)
conn->vsocket = vsocket;
conn->vid = vid;
- fdset_add(&vhost_user.fdset,
- conn_fd, vhost_user_msg_handler, NULL, conn);
+ fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn);
+}
+
+/* call back when there is new vhost-user connection from client */
+static void
+vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
+{
+ struct vhost_user_socket *vsocket = dat;
+
+ fd = accept(fd, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
+ vhost_user_add_connection(fd, vsocket);
}
/* callback when there is message on the connfd */
@@ -452,50 +417,135 @@ vhost_user_msg_handler(int connfd, void *dat, int *remove)
}
}
-/**
- * Creates and initialise the vhost server.
- */
-int
-rte_vhost_driver_register(const char *path)
+static int
+create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
{
- struct vhost_user_socket *vsocket;
+ int fd;
- pthread_mutex_lock(&vhost_user.mutex);
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -1;
+ RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
+ is_server ? "server" : "client", fd);
- if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "error: the number of servers reaches maximum\n");
- pthread_mutex_unlock(&vhost_user.mutex);
+ memset(un, 0, sizeof(*un));
+ un->sun_family = AF_UNIX;
+ strncpy(un->sun_path, path, sizeof(un->sun_path));
+
+ return fd;
+}
+
+static int
+vhost_user_create_server(struct vhost_user_socket *vsocket)
+{
+ int fd;
+ int ret;
+ struct sockaddr_un un;
+ const char *path = vsocket->path;
+
+ fd = create_unix_socket(path, &un, vsocket->is_server);
+ if (fd < 0)
return -1;
+
+ ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to bind to %s: %s; remove it and try again\n",
+ path, strerror(errno));
+ goto err;
}
+ RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
- vsocket = calloc(sizeof(struct vhost_user_socket), 1);
- if (vsocket == NULL) {
- pthread_mutex_unlock(&vhost_user.mutex);
+ ret = listen(fd, MAX_VIRTIO_BACKLOG);
+ if (ret < 0)
+ goto err;
+
+ vsocket->listenfd = fd;
+ fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
+ NULL, vsocket);
+
+ return 0;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static int
+vhost_user_create_client(struct vhost_user_socket *vsocket)
+{
+ int fd;
+ int ret;
+ struct sockaddr_un un;
+ const char *path = vsocket->path;
+
+ fd = create_unix_socket(path, &un, vsocket->is_server);
+ if (fd < 0)
+ return -1;
+
+ ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
+ path, strerror(errno));
+ close(fd);
return -1;
}
- vsocket->listenfd = uds_socket(path);
- if (vsocket->listenfd < 0) {
- free(vsocket);
- pthread_mutex_unlock(&vhost_user.mutex);
+ vhost_user_add_connection(fd, vsocket);
+
+ return 0;
+}
+
+/*
+ * Register a new vhost-user socket; here we could act as server
+ * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
+ * is set.
+ */
+int
+rte_vhost_driver_register(const char *path, uint64_t flags)
+{
+ int ret = -1;
+ struct vhost_user_socket *vsocket;
+
+ if (!path)
return -1;
+
+ pthread_mutex_lock(&vhost_user.mutex);
+
+ if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "error: the number of vhost sockets reaches maximum\n");
+ goto out;
}
+ vsocket = malloc(sizeof(struct vhost_user_socket));
+ if (!vsocket)
+ goto out;
+ memset(vsocket, 0, sizeof(struct vhost_user_socket));
vsocket->path = strdup(path);
- fdset_add(&vhost_user.fdset, vsocket->listenfd,
- vhost_user_new_connection, NULL, vsocket);
+ if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+ ret = vhost_user_create_client(vsocket);
+ } else {
+ vsocket->is_server = true;
+ ret = vhost_user_create_server(vsocket);
+ }
+ if (ret < 0) {
+ free(vsocket->path);
+ free(vsocket);
+ goto out;
+ }
vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+
+out:
pthread_mutex_unlock(&vhost_user.mutex);
- return 0;
+ return ret;
}
-
/**
- * Unregister the specified vhost server
+ * Unregister the specified vhost socket
*/
int
rte_vhost_driver_unregister(const char *path)
@@ -507,15 +557,16 @@ rte_vhost_driver_unregister(const char *path)
for (i = 0; i < vhost_user.vsocket_cnt; i++) {
if (!strcmp(vhost_user.vsockets[i]->path, path)) {
- fdset_del(&vhost_user.fdset,
- vhost_user.vsockets[i]->listenfd);
+ if (vhost_user.vsockets[i]->is_server) {
+ fdset_del(&vhost_user.fdset,
+ vhost_user.vsockets[i]->listenfd);
+ close(vhost_user.vsockets[i]->listenfd);
+ unlink(path);
+ }
- close(vhost_user.vsockets[i]->listenfd);
free(vhost_user.vsockets[i]->path);
free(vhost_user.vsockets[i]);
- unlink(path);
-
count = --vhost_user.vsocket_cnt;
vhost_user.vsockets[i] = vhost_user.vsockets[count];
vhost_user.vsockets[count] = NULL;
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v3 3/6] vhost: add reconnect ability
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 2/6] vhost: add vhost-user " Yuanhan Liu
@ 2016-06-07 4:05 ` Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 4/6] vhost: workaround stale vring base Yuanhan Liu
` (3 subsequent siblings)
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-06-07 4:05 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
Allow reconnecting on failure by default when:
- DPDK app starts first and QEMU (as the server) is not started yet.
Without reconnecting, DPDK app would simply fail on vhost-user
registration.
- QEMU restarts, say due to OS reboot.
Without reconnecting, you can't re-establish the connection without
restarting DPDK app.
This patch make it work well for both above cases. It simply creates
a new thread, and keep trying calling "connect()", until it succeeds.
The reconnect could be disabled when RTE_VHOST_USER_NO_RECONNECT flag
is set.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v2: - create one thread only to handle all reconnects
- update release note on reconnect
v3: - enable reconnect by default, which is a handy ability to have.
---
doc/guides/rel_notes/release_16_07.rst | 7 ++
lib/librte_vhost/rte_virtio_net.h | 1 +
lib/librte_vhost/vhost_user/vhost-net-user.c | 103 +++++++++++++++++++++++++--
3 files changed, 107 insertions(+), 4 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index b997a20..107ed83 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -61,6 +61,13 @@ New Features
the time of writing this, patches for QEMU are not merged yet, but it's
likely to get merged in v2.7.
+ DPDK vhost-user will also try to reconnect by default when
+
+ * the first connect fails (when QEMU is not started yet)
+ * the connection is broken (when QEMU restarts)
+
+ It can be turned off if flag ``RTE_VHOST_USER_NO_RECONNECT`` is set.
+
Resolved Issues
---------------
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 5f69e78..9caa622 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -52,6 +52,7 @@
#include <rte_ether.h>
#define RTE_VHOST_USER_CLIENT (1ULL << 0)
+#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
/* Enum for virtqueue management. */
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index a480f9f..94f1b92 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -41,6 +41,7 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
+#include <sys/queue.h>
#include <errno.h>
#include <pthread.h>
@@ -60,6 +61,7 @@ struct vhost_user_socket {
char *path;
int listenfd;
bool is_server;
+ bool reconnect;
};
struct vhost_user_connection {
@@ -79,6 +81,7 @@ struct vhost_user {
static void vhost_user_server_new_connection(int fd, void *data, int *remove);
static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+static int vhost_user_create_client(struct vhost_user_socket *vsocket);
static struct vhost_user vhost_user = {
.fdset = {
@@ -305,6 +308,8 @@ vhost_user_msg_handler(int connfd, void *dat, int *remove)
vid = conn->vid;
ret = read_vhost_message(connfd, &msg);
if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+ struct vhost_user_socket *vsocket = conn->vsocket;
+
if (ret < 0)
RTE_LOG(ERR, VHOST_CONFIG,
"vhost read message failed\n");
@@ -320,6 +325,9 @@ vhost_user_msg_handler(int connfd, void *dat, int *remove)
free(conn);
vhost_destroy_device(vid);
+ if (vsocket->reconnect)
+ vhost_user_create_client(vsocket);
+
return;
}
@@ -471,6 +479,73 @@ err:
return -1;
}
+struct vhost_user_reconnect {
+ struct sockaddr_un un;
+ int fd;
+ struct vhost_user_socket *vsocket;
+
+ TAILQ_ENTRY(vhost_user_reconnect) next;
+};
+
+TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
+struct vhost_user_reconnect_list {
+ struct vhost_user_reconnect_tailq_list head;
+ pthread_mutex_t mutex;
+};
+
+static struct vhost_user_reconnect_list reconn_list;
+static pthread_t reconn_tid;
+
+static void *
+vhost_user_client_reconnect(void *arg __rte_unused)
+{
+ struct vhost_user_reconnect *reconn, *next;
+
+ while (1) {
+ pthread_mutex_lock(&reconn_list.mutex);
+
+ /*
+ * An equal implementation of TAILQ_FOREACH_SAFE,
+ * which does not exist on all platforms.
+ */
+ for (reconn = TAILQ_FIRST(&reconn_list.head);
+ reconn != NULL; reconn = next) {
+ next = TAILQ_NEXT(reconn, next);
+
+ if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
+ sizeof(reconn->un)) < 0)
+ continue;
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "%s: connected\n", reconn->vsocket->path);
+ vhost_user_add_connection(reconn->fd, reconn->vsocket);
+ TAILQ_REMOVE(&reconn_list.head, reconn, next);
+ free(reconn);
+ }
+
+ pthread_mutex_unlock(&reconn_list.mutex);
+ sleep(1);
+ }
+
+ return NULL;
+}
+
+static int
+vhost_user_reconnect_init(void)
+{
+ int ret;
+
+ pthread_mutex_init(&reconn_list.mutex, NULL);
+ TAILQ_INIT(&reconn_list.head);
+
+ ret = pthread_create(&reconn_tid, NULL,
+ vhost_user_client_reconnect, NULL);
+ if (ret < 0)
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
+
+ return ret;
+}
+
static int
vhost_user_create_client(struct vhost_user_socket *vsocket)
{
@@ -478,20 +553,35 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
int ret;
struct sockaddr_un un;
const char *path = vsocket->path;
+ struct vhost_user_reconnect *reconn;
fd = create_unix_socket(path, &un, vsocket->is_server);
if (fd < 0)
return -1;
ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG, "failed to connect to %s: %s\n",
- path, strerror(errno));
+ if (ret == 0) {
+ vhost_user_add_connection(fd, vsocket);
+ return 0;
+ }
+
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to connect to %s: %s\n",
+ path, strerror(errno));
+
+ if (!vsocket->reconnect) {
close(fd);
return -1;
}
- vhost_user_add_connection(fd, vsocket);
+ RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path);
+ reconn = malloc(sizeof(*reconn));
+ reconn->un = un;
+ reconn->fd = fd;
+ reconn->vsocket = vsocket;
+ pthread_mutex_lock(&reconn_list.mutex);
+ TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
+ pthread_mutex_unlock(&reconn_list.mutex);
return 0;
}
@@ -525,6 +615,11 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
vsocket->path = strdup(path);
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+ vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
+ if (vsocket->reconnect && reconn_tid == 0) {
+ if (vhost_user_reconnect_init() < 0)
+ goto out;
+ }
ret = vhost_user_create_client(vsocket);
} else {
vsocket->is_server = true;
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v3 4/6] vhost: workaround stale vring base
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
` (2 preceding siblings ...)
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 3/6] vhost: add reconnect ability Yuanhan Liu
@ 2016-06-07 4:05 ` Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 5/6] examples/vhost: add client option Yuanhan Liu
` (2 subsequent siblings)
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-06-07 4:05 UTC (permalink / raw)
To: dev
Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu,
Michael S. Tsirkin
When DPDK app crashes (or quits, or gets killed), a restart of DPDK
app would get stale vring base from QEMU. That would break the kernel
virtio net completely, making it non-work any more, unless a driver
reset is done.
So, instead of getting the stale vring base from QEMU, Huawei suggested
we could get a much saner (and may not the most accurate) vring base
from used->idx. That would work because:
- there is a memory barrier between updating used ring entries and
used->idx. So, even though we crashed at updating the used ring
entries, it will not cause any issue, as the guest driver will not
process those stale used entries, for used-idx is not updated yet.
- DPDK process vring in order, that means a crash may just lead some
packet retransmission for Tx and drop for Rx.
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Suggested-by: Huawei Xie <huawei.xie@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Acked-by: "Michael S. Tsirkin" <mst@redhat.com>
Acked-by: Huawei Xie <huawei.xie@intel.com>
---
v2: log on packets resent for Tx and drop for Rx
---
lib/librte_vhost/virtio-net.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 835ab3a..bd7e55e 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -561,6 +561,15 @@ vhost_set_vring_addr(int vid, struct vhost_vring_addr *addr)
return -1;
}
+ if (vq->last_used_idx != vq->used->idx) {
+ RTE_LOG(WARNING, VHOST_CONFIG,
+ "last_used_idx (%u) and vq->used->idx (%u) mismatches; "
+ "some packets maybe resent for Tx and dropped for Rx\n",
+ vq->last_used_idx, vq->used->idx);
+ vq->last_used_idx = vq->used->idx;
+ vq->last_used_idx_res = vq->used->idx;
+ }
+
vq->log_guest_addr = addr->log_guest_addr;
LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v3 5/6] examples/vhost: add client option
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
` (3 preceding siblings ...)
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 4/6] vhost: workaround stale vring base Yuanhan Liu
@ 2016-06-07 4:05 ` Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 6/6] vhost: add pmd " Yuanhan Liu
2016-06-14 12:00 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-06-07 4:05 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
Add --client option to let vhost-switch acts as the client.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v3: remove the --reconnect opt, as it's enabled by default.
---
examples/vhost/main.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 9406d94..bd92ac6 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -132,6 +132,8 @@ static uint32_t enable_tx_csum;
/* Disable TSO offload */
static uint32_t enable_tso;
+static int client_mode;
+
/* Specify timeout (in useconds) between retries on RX. */
static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
/* Specify the number of retries on RX. */
@@ -458,7 +460,8 @@ us_vhost_usage(const char *prgname)
" --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
" --dev-basename: The basename to be used for the character device.\n"
" --tx-csum [0|1] disable/enable TX checksum offload.\n"
- " --tso [0|1] disable/enable TCP segment offload.\n",
+ " --tso [0|1] disable/enable TCP segment offload.\n"
+ " --client register a vhost-user socket as client mode.\n",
prgname);
}
@@ -483,6 +486,7 @@ us_vhost_parse_args(int argc, char **argv)
{"dev-basename", required_argument, NULL, 0},
{"tx-csum", required_argument, NULL, 0},
{"tso", required_argument, NULL, 0},
+ {"client", no_argument, &client_mode, 1},
{NULL, 0, 0, 0},
};
@@ -1397,6 +1401,7 @@ main(int argc, char *argv[])
uint8_t portid;
static pthread_t tid;
char thread_name[RTE_MAX_THREAD_NAME_LEN];
+ uint64_t flags = 0;
signal(SIGINT, sigint_handler);
@@ -1487,8 +1492,11 @@ main(int argc, char *argv[])
if (mergeable == 0)
rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
+ if (client_mode)
+ flags |= RTE_VHOST_USER_CLIENT;
+
/* Register vhost(cuse or user) driver to handle vhost messages. */
- ret = rte_vhost_driver_register(dev_basename, 0);
+ ret = rte_vhost_driver_register(dev_basename, flags);
if (ret != 0)
rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* [dpdk-dev] [PATCH v3 6/6] vhost: add pmd client option
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
` (4 preceding siblings ...)
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 5/6] examples/vhost: add client option Yuanhan Liu
@ 2016-06-07 4:05 ` Yuanhan Liu
2016-06-14 12:00 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-06-07 4:05 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau, Yuanhan Liu
Add client option to vhost pmd, to let it act as the vhost-user client.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v3: - remove the reconect option, as it's enabled by default now.
- fixed non-initilized var reported by Rich
---
drivers/net/vhost/rte_eth_vhost.c | 38 ++++++++++++++++++++++++++------------
1 file changed, 26 insertions(+), 12 deletions(-)
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 91b5d95..96eda5f 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -50,12 +50,14 @@
#define ETH_VHOST_IFACE_ARG "iface"
#define ETH_VHOST_QUEUES_ARG "queues"
+#define ETH_VHOST_CLIENT_ARG "client"
static const char *drivername = "VHOST PMD";
static const char *valid_arguments[] = {
ETH_VHOST_IFACE_ARG,
ETH_VHOST_QUEUES_ARG,
+ ETH_VHOST_CLIENT_ARG,
NULL
};
@@ -89,6 +91,7 @@ struct pmd_internal {
char *dev_name;
char *iface_name;
uint16_t max_queues;
+ uint64_t flags;
volatile uint16_t once;
};
@@ -467,7 +470,8 @@ eth_dev_start(struct rte_eth_dev *dev)
int ret = 0;
if (rte_atomic16_cmpset(&internal->once, 0, 1)) {
- ret = rte_vhost_driver_register(internal->iface_name, 0);
+ ret = rte_vhost_driver_register(internal->iface_name,
+ internal->flags);
if (ret)
return ret;
}
@@ -672,7 +676,7 @@ static const struct eth_dev_ops ops = {
static int
eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues,
- const unsigned numa_node)
+ const unsigned numa_node, uint64_t flags)
{
struct rte_eth_dev_data *data = NULL;
struct pmd_internal *internal = NULL;
@@ -729,6 +733,7 @@ eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues,
internal->iface_name = strdup(iface_name);
if (internal->iface_name == NULL)
goto error;
+ internal->flags = flags;
list->eth_dev = eth_dev;
pthread_mutex_lock(&internal_list_lock);
@@ -793,18 +798,15 @@ open_iface(const char *key __rte_unused, const char *value, void *extra_args)
}
static inline int
-open_queues(const char *key __rte_unused, const char *value, void *extra_args)
+open_int(const char *key __rte_unused, const char *value, void *extra_args)
{
- uint16_t *q = extra_args;
+ uint16_t *n = extra_args;
if (value == NULL || extra_args == NULL)
return -EINVAL;
- *q = (uint16_t)strtoul(value, NULL, 0);
- if (*q == USHRT_MAX && errno == ERANGE)
- return -1;
-
- if (*q > RTE_MAX_QUEUES_PER_PORT)
+ *n = (uint16_t)strtoul(value, NULL, 0);
+ if (*n == USHRT_MAX && errno == ERANGE)
return -1;
return 0;
@@ -817,6 +819,8 @@ rte_pmd_vhost_devinit(const char *name, const char *params)
int ret = 0;
char *iface_name;
uint16_t queues;
+ uint64_t flags = 0;
+ int client_mode = 0;
RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", name);
@@ -836,14 +840,24 @@ rte_pmd_vhost_devinit(const char *name, const char *params)
if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
- &open_queues, &queues);
- if (ret < 0)
+ &open_int, &queues);
+ if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT)
goto out_free;
} else
queues = 1;
- eth_dev_vhost_create(name, iface_name, queues, rte_socket_id());
+ if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) {
+ ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG,
+ &open_int, &client_mode);
+ if (ret < 0)
+ goto out_free;
+
+ if (client_mode)
+ flags |= RTE_VHOST_USER_CLIENT;
+ }
+
+ eth_dev_vhost_create(name, iface_name, queues, rte_socket_id(), flags);
out_free:
rte_kvargs_free(kvlist);
--
1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base
2016-05-09 18:22 ` Yuanhan Liu
@ 2016-06-13 20:47 ` Michael S. Tsirkin
0 siblings, 0 replies; 50+ messages in thread
From: Michael S. Tsirkin @ 2016-06-13 20:47 UTC (permalink / raw)
To: Yuanhan Liu; +Cc: Xie, Huawei, dev, mlureau
On Mon, May 09, 2016 at 11:22:04AM -0700, Yuanhan Liu wrote:
> On Mon, May 09, 2016 at 04:25:38PM +0000, Xie, Huawei wrote:
> > On 5/7/2016 2:36 PM, Yuanhan Liu wrote:
> > > However, Michael claims some concerns: he made a good point: a crash
> > > is happening means some memory is corrupted, and it could be the virtio
> > > memory being corrupted. In such case, nothing will work without the
> > > reset.
> >
> > I don't get this point. What is the scenario?
>
> It's not a specific scenario, just a hypothetic one.
>
> > For the crash of virtio frontend driver, i remember we discussed before,
> > we have no good recipe but some workaround. The user space frontend
> > driver crashes, and its memory is reallocated to other instances, but
> > vhost is still writing to that memory. However this has nothing to do
> > with vhost reconnect.
>
> Hmm, yes, seems like another good point to me. This patch seems like
> a fix but not a workaround then :)
>
> --yliu
I think it's a good idea to make sure the ring is
already setup by this time though.
Some future QEMU version might send base idx before
it will setup the ring, we would not want to crash.
Hopefully it will have fixed the base value sent by then.
--
MST
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
` (5 preceding siblings ...)
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 6/6] vhost: add pmd " Yuanhan Liu
@ 2016-06-14 12:00 ` Yuanhan Liu
6 siblings, 0 replies; 50+ messages in thread
From: Yuanhan Liu @ 2016-06-14 12:00 UTC (permalink / raw)
To: dev; +Cc: huawei.xie, Traynor Kevin, marcandre.lureau
Applied to dpdk-next-virtio.
--yliu
On Tue, Jun 07, 2016 at 12:05:02PM +0800, Yuanhan Liu wrote:
> v3: - make the "reconnect" feature be default for client mode, as it's
> good to handle guest OS restart with less effort.
> - fix var not-initilized error pointed out by Rich
>
>
> NOTE: I created a branch at dpdk.org [0] for more convenient testing:
>
> [0]: git://dpdk.org/next/dpdk-next-virtio for-testing
>
>
> When the DPDK vhost-user application (such as OVS) restarts (due to
> crash, or update), the vhost-user connection between DPDK and QEMU
> won't be established automatically again. In another word, the virtio
> net is broken.
>
> The reason it doesn't work is that DPDK just acts as server only.
> A restart of the server needs a reconnection from the client (QEMU).
> However, reconnect from QEMU is not supported from QEMU.
>
> Adding the support of client mode and let DPDK be the client somehow
> would resolve above issue a bit easier: a restart of DPDK would naturally
> try to connect to the server (QEMU) automatically.
>
> Therefore, this patchset implements the DPDK vhost-user client mode, by
> introducing a new arg (flags) for API rte_vhost_driver_register(). And the
> client mode is enabled when RTE_VHOST_USER_CLIENT is given. Note that this
> implies an API breakage. However, since this release deals with ABI/API
> refactoring, it should not be an issue.
>
> Another interesting thing to make it work is that you not only have
> to consider that case the DPDK vhost-user app might restart, but also
> have to think that QEMU might restart as well: guest OS sometimes
> just reboots. In such case, when the server is down, the client has
> to keep reconnecting with the server until the server is back and the
> connection is established again. And that's what "reconnect" patch for.
>
> Note that current QEMU doesn't not support a second time connection
> from client, thus a restart of DPDK vhost-user will not work. This is
> because current QEMU won't be able to detect the disconnect from
> restart, thus it will not listen for later connections. Patches [1] have
> been sent, it's just not merged yet. But unlike the vhost-user mulitple
> queue case, that we have critical depends on QEMU implementation, here
> we have no such dependency, therefore, I think it's okay to make DPDK
> be ready for the "reconnect" stuff first. (note that I also mentioned
> this fact in the release doc).
>
> [1]: http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg01507.html
>
> v2: - added release doc
> - do not remove socket file for the client mode
> - create one thread ony to handle all reconnects
>
>
> Thanks.
> --yliu
>
> ---
> Yuanhan Liu (6):
> vhost: rename structs for enabling client mode
> vhost: add vhost-user client mode
> vhost: add reconnect ability
> vhost: workaround stale vring base
> examples/vhost: add client option
> vhost: add pmd client option
>
> doc/guides/rel_notes/release_16_07.rst | 21 ++
> drivers/net/vhost/rte_eth_vhost.c | 38 ++-
> examples/vhost/main.c | 12 +-
> lib/librte_vhost/rte_virtio_net.h | 12 +-
> lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 8 +-
> lib/librte_vhost/vhost_user/vhost-net-user.c | 403 ++++++++++++++++++---------
> lib/librte_vhost/vhost_user/vhost-net-user.h | 6 -
> lib/librte_vhost/virtio-net.c | 9 +
> 8 files changed, 361 insertions(+), 148 deletions(-)
>
> --
> 1.9.0
^ permalink raw reply [flat|nested] 50+ messages in thread
end of thread, other threads:[~2016-06-14 11:58 UTC | newest]
Thread overview: 50+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-07 6:40 [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
2016-05-07 6:40 ` [dpdk-dev] [PATCH 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
2016-05-07 6:40 ` [dpdk-dev] [PATCH 2/6] vhost: add vhost-user " Yuanhan Liu
2016-05-09 10:33 ` Victor Kaplansky
2016-05-09 20:33 ` Yuanhan Liu
2016-05-09 20:30 ` Michael S. Tsirkin
2016-05-07 6:40 ` [dpdk-dev] [PATCH 3/6] vhost: add reconnect ability Yuanhan Liu
2016-05-09 16:47 ` Xie, Huawei
2016-05-09 18:12 ` Yuanhan Liu
2016-05-10 7:24 ` Xie, Huawei
2016-05-10 7:54 ` Michael S. Tsirkin
2016-05-10 8:07 ` Xie, Huawei
2016-05-10 8:42 ` Michael S. Tsirkin
2016-05-10 9:00 ` Xie, Huawei
2016-05-10 9:17 ` Michael S. Tsirkin
2016-05-10 17:17 ` Loftus, Ciara
2016-05-11 21:46 ` Michael S. Tsirkin
2016-05-07 6:40 ` [dpdk-dev] [PATCH 4/6] vhost: workaround stale vring base Yuanhan Liu
2016-05-09 10:45 ` Victor Kaplansky
2016-05-09 13:39 ` Xie, Huawei
2016-05-09 18:23 ` Yuanhan Liu
2016-05-09 12:19 ` Michael S. Tsirkin
2016-05-09 16:25 ` Xie, Huawei
2016-05-09 18:22 ` Yuanhan Liu
2016-06-13 20:47 ` Michael S. Tsirkin
2016-05-10 8:21 ` Xie, Huawei
2016-05-07 6:40 ` [dpdk-dev] [PATCH 5/6] examples/vhost: add client and reconnect option Yuanhan Liu
2016-05-09 10:47 ` Victor Kaplansky
2016-05-07 6:40 ` [dpdk-dev] [PATCH 6/6] vhost: add pmd " Yuanhan Liu
2016-05-09 10:54 ` Victor Kaplansky
2016-05-09 18:26 ` Yuanhan Liu
2016-05-10 3:23 ` [dpdk-dev] [PATCH 0/6] vhost: add vhost-user client mode and reconnect ability Xu, Qian Q
2016-05-10 17:41 ` Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 " Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 2/6] vhost: add vhost-user " Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 3/6] vhost: add reconnect ability Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 4/6] vhost: workaround stale vring base Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 5/6] examples/vhost: add client and reconnect option Yuanhan Liu
2016-05-13 6:16 ` [dpdk-dev] [PATCH v2 6/6] vhost: add pmd " Yuanhan Liu
2016-05-25 17:45 ` Rich Lane
2016-05-26 8:01 ` Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 1/6] vhost: rename structs for enabling client mode Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 2/6] vhost: add vhost-user " Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 3/6] vhost: add reconnect ability Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 4/6] vhost: workaround stale vring base Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 5/6] examples/vhost: add client option Yuanhan Liu
2016-06-07 4:05 ` [dpdk-dev] [PATCH v3 6/6] vhost: add pmd " Yuanhan Liu
2016-06-14 12:00 ` [dpdk-dev] [PATCH v3 0/6] vhost: add vhost-user client mode and reconnect ability Yuanhan Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).