DPDK patches and discussions
 help / color / mirror / Atom feed
From: Huawei Xie <huawei.xie@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2 11/11] lib/librte_vhost: support dynamically registering vhost server
Date: Thu, 12 Feb 2015 13:07:29 +0800	[thread overview]
Message-ID: <1423717649-11818-12-git-send-email-huawei.xie@intel.com> (raw)
In-Reply-To: <1423717649-11818-1-git-send-email-huawei.xie@intel.com>

* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is finished.

mutex lock scenario in vhost:

* event_dispatch(in rte_vhost_driver_session_start) runs in a seperate thread, infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
  1. clears busy flag.
  2. if there is remove request, call fdset_del, which acquires mutex, checks busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data context.

The above steps ensures fd data context isn't freed when cb is using.

VM(s) should have been shutdown before rte_vhost_driver_unregister.

Signed-off-by: Huawei Xie <huawei.xie@intel.com>
---
 lib/librte_vhost/vhost_user/fd_man.c         | 63 +++++++++++++++++++++++++---
 lib/librte_vhost/vhost_user/fd_man.h         |  5 ++-
 lib/librte_vhost/vhost_user/vhost-net-user.c | 34 +++++++++------
 3 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
index 929fbc3..63ac4df 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -40,6 +40,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include <rte_common.h>
 #include <rte_log.h>
 
 #include "fd_man.h"
@@ -145,6 +146,8 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
 	if (pfdset == NULL || fd == -1)
 		return -1;
 
+	pthread_mutex_lock(&pfdset->fd_mutex);
+
 	/* Find a free slot in the list. */
 	i = fdset_find_free_slot(pfdset);
 	if (i == -1)
@@ -153,6 +156,8 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
 	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
 	pfdset->num++;
 
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+
 	return 0;
 }
 
@@ -164,17 +169,36 @@ fdset_del(struct fdset *pfdset, int fd)
 {
 	int i;
 
+	if (pfdset == NULL || fd == -1)
+		return;
+
+again:
+	pthread_mutex_lock(&pfdset->fd_mutex);
+
 	i = fdset_find_fd(pfdset, fd);
 	if (i != -1 && fd != -1) {
+		/* busy indicates r/wcb is executing! */
+		if (pfdset->fd[i].busy == 1) {
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+			goto again;
+		}
+
 		pfdset->fd[i].fd = -1;
 		pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
 		pfdset->num--;
 	}
+
+	pthread_mutex_unlock(&pfdset->fd_mutex);
 }
 
 /**
  * This functions runs in infinite blocking loop until there is no fd in
  * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
  */
 void
 fdset_event_dispatch(struct fdset *pfdset)
@@ -183,6 +207,10 @@ fdset_event_dispatch(struct fdset *pfdset)
 	int i, maxfds;
 	struct fdentry *pfdentry;
 	int num = MAX_FDS;
+	fd_cb rcb, wcb;
+	void *dat;
+	int fd;
+	int remove1, remove2;
 
 	if (pfdset == NULL)
 		return;
@@ -190,18 +218,41 @@ fdset_event_dispatch(struct fdset *pfdset)
 	while (1) {
 		FD_ZERO(&rfds);
 		FD_ZERO(&wfds);
+		pthread_mutex_lock(&pfdset->fd_mutex);
+
 		maxfds = fdset_fill(&rfds, &wfds, pfdset);
-		if (maxfds == -1)
-			return;
+		if (maxfds == -1) {
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+			sleep(1);
+			continue;
+		}
+
+		pthread_mutex_unlock(&pfdset->fd_mutex);
 
 		select(maxfds + 1, &rfds, &wfds, NULL, NULL);
 
 		for (i = 0; i < num; i++) {
+			remove1 = remove2 = 0;
+			pthread_mutex_lock(&pfdset->fd_mutex);
 			pfdentry = &pfdset->fd[i];
-			if (pfdentry->fd >= 0 && FD_ISSET(pfdentry->fd, &rfds) && pfdentry->rcb)
-				pfdentry->rcb(pfdentry->fd, pfdentry->dat);
-			if (pfdentry->fd >= 0 && FD_ISSET(pfdentry->fd, &wfds) && pfdentry->wcb)
-				pfdentry->wcb(pfdentry->fd, pfdentry->dat);
+			fd = pfdentry->fd;
+			rcb = pfdentry->rcb;
+			wcb = pfdentry->wcb;
+			dat = pfdentry->dat;
+			pfdentry->busy = 1;
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+			if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb)
+				rcb(fd, dat, &remove1);
+			if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb)
+				wcb(fd, dat, &remove2);
+			pfdentry->busy = 0;
+			/*
+			 * fdset_del needs to check busy flag.
+			 * We don't allow fdset_del to be called in callback
+			 * directly.
+			 */
+			if (remove1 || remove2)
+				fdset_del(pfdset, fd);
 		}
 	}
 }
diff --git a/lib/librte_vhost/vhost_user/fd_man.h b/lib/librte_vhost/vhost_user/fd_man.h
index 26b4619..74ecde2 100644
--- a/lib/librte_vhost/vhost_user/fd_man.h
+++ b/lib/librte_vhost/vhost_user/fd_man.h
@@ -34,20 +34,23 @@
 #ifndef _FD_MAN_H_
 #define _FD_MAN_H_
 #include <stdint.h>
+#include <pthread.h>
 
 #define MAX_FDS 1024
 
-typedef void (*fd_cb)(int fd, void *dat);
+typedef void (*fd_cb)(int fd, void *dat, int *remove);
 
 struct fdentry {
 	int fd;		/* -1 indicates this entry is empty */
 	fd_cb rcb;	/* callback when this fd is readable. */
 	fd_cb wcb;	/* callback when this fd is writeable.*/
 	void *dat;	/* fd context */
+	int busy;	/* whether this entry is being used in cb. */
 };
 
 struct fdset {
 	struct fdentry fd[MAX_FDS];
+	pthread_mutex_t fd_mutex;
 	int num;	/* current fd number of this fdset */
 };
 
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 634a498..3aa9436 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -41,6 +41,7 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <errno.h>
+#include <pthread.h>
 
 #include <rte_log.h>
 #include <rte_virtio_net.h>
@@ -51,8 +52,9 @@
 #include "virtio-net-user.h"
 
 #define MAX_VIRTIO_BACKLOG 128
-static void vserver_new_vq_conn(int fd, void *data);
-static void vserver_message_handler(int fd, void *dat);
+
+static void vserver_new_vq_conn(int fd, void *data, int *remove);
+static void vserver_message_handler(int fd, void *dat, int *remove);
 struct vhost_net_device_ops const *ops;
 
 struct connfd_ctx {
@@ -61,10 +63,18 @@ struct connfd_ctx {
 };
 
 #define MAX_VHOST_SERVER 1024
-static struct {
+struct _vhost_server {
 	struct vhost_server *server[MAX_VHOST_SERVER];
-	struct fdset fdset;	/**< The fd list this vhost server manages. */
-} g_vhost_server;
+	struct fdset fdset;
+};
+
+static struct _vhost_server g_vhost_server = {
+	.fdset = {
+		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
+		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.num = 0
+	},
+};
 
 static int vserver_idx;
 
@@ -261,7 +271,7 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
 
 /* call back when there is new virtio connection.  */
 static void
-vserver_new_vq_conn(int fd, void *dat)
+vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
 {
 	struct vhost_server *vserver = (struct vhost_server *)dat;
 	int conn_fd;
@@ -304,7 +314,7 @@ vserver_new_vq_conn(int fd, void *dat)
 
 /* callback when there is message on the connfd */
 static void
-vserver_message_handler(int connfd, void *dat)
+vserver_message_handler(int connfd, void *dat, int *remove)
 {
 	struct vhost_device_ctx ctx;
 	struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
@@ -319,7 +329,7 @@ vserver_message_handler(int connfd, void *dat)
 			"vhost read message failed\n");
 
 		close(connfd);
-		fdset_del(&g_vhost_server.fdset, connfd);
+		*remove = 1;
 		free(cfd_ctx);
 		user_destroy_device(ctx);
 		ops->destroy_device(ctx);
@@ -330,7 +340,7 @@ vserver_message_handler(int connfd, void *dat)
 			"vhost peer closed\n");
 
 		close(connfd);
-		fdset_del(&g_vhost_server.fdset, connfd);
+		*remove = 1;
 		free(cfd_ctx);
 		user_destroy_device(ctx);
 		ops->destroy_device(ctx);
@@ -342,7 +352,7 @@ vserver_message_handler(int connfd, void *dat)
 			"vhost read incorrect message\n");
 
 		close(connfd);
-		fdset_del(&g_vhost_server.fdset, connfd);
+		*remove = 1;
 		free(cfd_ctx);
 		user_destroy_device(ctx);
 		ops->destroy_device(ctx);
@@ -426,10 +436,8 @@ rte_vhost_driver_register(const char *path)
 {
 	struct vhost_server *vserver;
 
-	if (vserver_idx == 0) {
-		fdset_init(&g_vhost_server.fdset);
+	if (vserver_idx == 0)
 		ops = get_virtio_net_callbacks();
-	}
 	if (vserver_idx == MAX_VHOST_SERVER)
 		return -1;
 
-- 
1.8.1.4

  parent reply	other threads:[~2015-02-12  5:08 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-12  5:07 [dpdk-dev] [PATCH v2 00/11] qemu vhost-user support Huawei Xie
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 01/11] lib/librte_vhost: enable VIRTIO_NET_F_CTRL_RX VIRTIO_NET_F_CTRL_RX is dependant on VIRTIO_NET_F_CTRL_VQ. Observed that virtio-net driver in guest would crash with only CTRL_RX enabled Huawei Xie
2015-02-16  8:15   ` Tetsuya Mukawa
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 02/11] lib/librte_vhost: create vhost_cuse directory and move vhost-net-cdev.c into vhost_cuse Huawei Xie
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 03/11] lib/librte_vhost: rename vhost-net-cdev.h to vhost-net.h Huawei Xie
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 04/11] lib/librte_vhost: move fd copying(from qemu process into vhost process) to eventfd_copy.c Huawei Xie
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 05/11] lib/librte_vhost: copy host_memory_map from virtio-net.c to a new file virtio-net-cdev.c Huawei Xie
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 06/11] lib/librte_vhost: make host_memory_map a more generic function Huawei Xie
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 07/11] lib/librte_vhost: implement cuse_set_memory_table Huawei Xie
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 08/11] lib/librte_vhost: add select based event driven processing Huawei Xie
2015-02-16 17:10   ` Ananyev, Konstantin
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 09/11] lib/librte_vhost: vhost user support Huawei Xie
2015-02-12  8:26   ` Linhaifeng
2015-02-12  9:28     ` Xie, Huawei
2015-02-12 10:19       ` Linhaifeng
2015-02-12  5:07 ` [dpdk-dev] [PATCH v2 10/11] lib/librte_vhost: support dev->ifname for vhost-user Huawei Xie
2015-02-12  5:07 ` Huawei Xie [this message]
2015-02-16  8:17   ` [dpdk-dev] [PATCH v2 11/11] lib/librte_vhost: support dynamically registering vhost server Tetsuya Mukawa
2015-02-16 17:11   ` Ananyev, Konstantin
2015-02-12  5:26 ` [dpdk-dev] [PATCH v2 00/11] qemu vhost-user support Xie, Huawei
2015-02-16  8:19 ` Tetsuya Mukawa
2015-02-22 18:20   ` Thomas Monjalon
2015-02-23 13:53     ` Czesnowicz, Przemyslaw
2015-02-23 14:08       ` Thomas Monjalon
2015-02-23 14:15         ` Czesnowicz, Przemyslaw
2015-02-23  2:50 ` Tetsuya Mukawa
2015-02-23 17:36 ` [dpdk-dev] [PATCH v3 " Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 01/11] lib/librte_vhost: enable VIRTIO_NET_F_CTRL_RX VIRTIO_NET_F_CTRL_RX is dependant on VIRTIO_NET_F_CTRL_VQ. Observed that virtio-net driver in guest would crash with only CTRL_RX enabled Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 02/11] lib/librte_vhost: create vhost_cuse directory and move vhost-net-cdev.c into vhost_cuse Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 03/11] lib/librte_vhost: rename vhost-net-cdev.h to vhost-net.h Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 04/11] lib/librte_vhost: move fd copying(from qemu process into vhost process) to eventfd_copy.c Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 05/11] lib/librte_vhost: copy host_memory_map from virtio-net.c to a new file virtio-net-cdev.c Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 06/11] lib/librte_vhost: make host_memory_map a more generic function Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 07/11] lib/librte_vhost: implement cuse_set_memory_table Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 08/11] lib/librte_vhost: add select based event driven processing Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 09/11] lib/librte_vhost: vhost user support Przemyslaw Czesnowicz
2015-02-27  9:42     ` Xie, Huawei
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 10/11] lib/librte_vhost: support dev->ifname for vhost-user Przemyslaw Czesnowicz
2015-02-23 17:36   ` [dpdk-dev] [PATCH v3 11/11] lib/librte_vhost: support dynamically registering vhost server Przemyslaw Czesnowicz
2015-02-24  0:41   ` [dpdk-dev] [PATCH v3 00/11] qemu vhost-user support Thomas Monjalon
2015-02-25  5:55   ` Xie, Huawei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1423717649-11818-12-git-send-email-huawei.xie@intel.com \
    --to=huawei.xie@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).