DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
@ 2016-07-21  8:21 Ilya Maximets
  2016-07-21  9:37 ` Yuanhan Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21  8:21 UTC (permalink / raw)
  To: dev, Huawei Xie, Yuanhan Liu
  Cc: Dyasly Sergey, Heetae Ahn, Thomas Monjalon, Ilya Maximets

If something abnormal happened to QEMU, 'connect()' can block calling
thread (e.g. main thread of OVS) forever or for a really long time.
This can break whole application or block the reconnection thread.

Example with OVS:

	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
	(gdb) bt
	#0  connect () from /lib64/libpthread.so.0
	#1  vhost_user_create_client (vsocket=0xa816e0)
	#2  rte_vhost_driver_register
	#3  netdev_dpdk_vhost_user_construct
	#4  netdev_open (name=0xa664b0 "vhost1")
	[...]
	#11 main

Fix that by setting non-blocking mode for client sockets for connection.

Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---

This was reproduced with current QEMU master branch
(commit 1ecfb24da987b862f) + patch-set "vhost-user reconnect fixes"
(https://lists.nongnu.org/archive/html/qemu-devel/2016-07/msg01547.html).

OVS was patched to support client mode:
http://openvswitch.org/pipermail/dev/2016-July/074972.html

Following script forces QEMU to fail to initialize vhost because
disconnection occures while device not fully configured:

	while true
	do
		ovs-vsctl set Interface vhost1 ofport_request=125
		ovs-vsctl set Interface vhost1 ofport_request=126
	done

As a result: QEMU still works, network interface broken and OVS main
             thread stalled inside 'connect()'.

 lib/librte_vhost/vhost_user/vhost-net-user.c | 77 ++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index f0f92f8..08a5f6b 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -43,6 +43,7 @@
 #include <sys/un.h>
 #include <sys/queue.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <pthread.h>
 
 #include <rte_log.h>
@@ -449,6 +450,14 @@ create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
 	RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
 		is_server ? "server" : "client", fd);
 
+	if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"vhost-user: can't set nonblocking mode for socket, fd: "
+			"%d (%s)\n", fd, strerror(errno));
+		close(fd);
+		return -1;
+	}
+
 	memset(un, 0, sizeof(*un));
 	un->sun_family = AF_UNIX;
 	strncpy(un->sun_path, path, sizeof(un->sun_path));
@@ -516,9 +525,58 @@ struct vhost_user_reconnect_list {
 static struct vhost_user_reconnect_list reconn_list;
 static pthread_t reconn_tid;
 
+static int
+vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
+{
+	int ret, flags, so_error;
+	socklen_t len = sizeof(so_error);
+	fd_set fdset;
+	/* 5 second timeout */
+	struct timeval tv = {.tv_sec = 5, .tv_usec = 0};
+
+	errno = EINVAL;
+
+	ret = connect(fd, un, sz);
+	if (ret == -1 && errno != EINPROGRESS)
+		return -1;
+	if (ret == 0)
+		goto connected;
+
+	FD_ZERO(&fdset);
+	FD_SET(fd, &fdset);
+
+	ret = select(fd + 1, NULL, &fdset, NULL, &tv);
+	if (!ret)
+		errno = ETIMEDOUT;
+	if (ret != 1)
+		return -1;
+
+	ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len);
+	if (ret < 0 || so_error) {
+		if (!ret)
+			errno = so_error;
+		return -1;
+	}
+
+connected:
+	flags = fcntl(fd, F_GETFL, 0);
+	if (flags < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"can't get flags for connfd %d\n", fd);
+		return -2;
+	}
+	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"can't disable nonblocking on fd %d\n", fd);
+		return -2;
+	}
+	return 0;
+}
+
 static void *
 vhost_user_client_reconnect(void *arg __rte_unused)
 {
+	int ret;
 	struct vhost_user_reconnect *reconn, *next;
 
 	while (1) {
@@ -532,13 +590,23 @@ vhost_user_client_reconnect(void *arg __rte_unused)
 		     reconn != NULL; reconn = next) {
 			next = TAILQ_NEXT(reconn, next);
 
-			if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
-				    sizeof(reconn->un)) < 0)
+			ret = vhost_user_connect_nonblock(reconn->fd,
+						(struct sockaddr *)&reconn->un,
+						sizeof(reconn->un));
+			if (ret == -2) {
+				close(reconn->fd);
+				RTE_LOG(ERR, VHOST_CONFIG,
+					"reconnection for fd %d failed\n",
+					reconn->fd);
+				goto remove_fd;
+			}
+			if (ret == -1)
 				continue;
 
 			RTE_LOG(INFO, VHOST_CONFIG,
 				"%s: connected\n", reconn->vsocket->path);
 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
+remove_fd:
 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
 			free(reconn);
 		}
@@ -579,7 +647,8 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
 	if (fd < 0)
 		return -1;
 
-	ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+	ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
+					  sizeof(un));
 	if (ret == 0) {
 		vhost_user_add_connection(fd, vsocket);
 		return 0;
@@ -589,7 +658,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
 		"failed to connect to %s: %s\n",
 		path, strerror(errno));
 
-	if (!vsocket->reconnect) {
+	if (ret == -2 || !vsocket->reconnect) {
 		close(fd);
 		return -1;
 	}
-- 
2.7.4

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21  8:21 [dpdk-dev] [PATCH] vhost: fix connect hang in client mode Ilya Maximets
@ 2016-07-21  9:37 ` Yuanhan Liu
  2016-07-21  9:45   ` Ilya Maximets
  2016-07-21 11:12 ` [dpdk-dev] [PATCH v2] " Ilya Maximets
  2016-07-21 13:19 ` [dpdk-dev] [PATCH v3] " Ilya Maximets
  2 siblings, 1 reply; 20+ messages in thread
From: Yuanhan Liu @ 2016-07-21  9:37 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On Thu, Jul 21, 2016 at 11:21:15AM +0300, Ilya Maximets wrote:
> If something abnormal happened to QEMU, 'connect()' can block calling
> thread (e.g. main thread of OVS) forever or for a really long time.
> This can break whole application or block the reconnection thread.
> 
> Example with OVS:
> 
> 	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
> 	(gdb) bt
> 	#0  connect () from /lib64/libpthread.so.0
> 	#1  vhost_user_create_client (vsocket=0xa816e0)
> 	#2  rte_vhost_driver_register
> 	#3  netdev_dpdk_vhost_user_construct
> 	#4  netdev_open (name=0xa664b0 "vhost1")
> 	[...]
> 	#11 main
> 
> Fix that by setting non-blocking mode for client sockets for connection.
> 
> Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")

Thanks for spotting and fixing yet another bug!

>  
> +static int
> +vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)

I don't quite understand why this is needed: connect() with O_NONBLOCK
flag set is not enough?

	--yliu

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21  9:37 ` Yuanhan Liu
@ 2016-07-21  9:45   ` Ilya Maximets
  2016-07-21 10:13     ` Yuanhan Liu
  0 siblings, 1 reply; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21  9:45 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On 21.07.2016 12:37, Yuanhan Liu wrote:
> On Thu, Jul 21, 2016 at 11:21:15AM +0300, Ilya Maximets wrote:
>> If something abnormal happened to QEMU, 'connect()' can block calling
>> thread (e.g. main thread of OVS) forever or for a really long time.
>> This can break whole application or block the reconnection thread.
>>
>> Example with OVS:
>>
>> 	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
>> 	(gdb) bt
>> 	#0  connect () from /lib64/libpthread.so.0
>> 	#1  vhost_user_create_client (vsocket=0xa816e0)
>> 	#2  rte_vhost_driver_register
>> 	#3  netdev_dpdk_vhost_user_construct
>> 	#4  netdev_open (name=0xa664b0 "vhost1")
>> 	[...]
>> 	#11 main
>>
>> Fix that by setting non-blocking mode for client sockets for connection.
>>
>> Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")
> 
> Thanks for spotting and fixing yet another bug!
> 
>>  
>> +static int
>> +vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
> 
> I don't quite understand why this is needed: connect() with O_NONBLOCK
> flag set is not enough?

There is a little issue with non-blocking connect() call. Connection
establishing may be started but '-1' returned with 'errno = EINPROGRESS'.
In this case we must wait on fd until it will be available for writing.
After that we need to check current status of connection using getsockopt().

I don't sure that we're able to get such situation, but it's documented,
and, I think, we should handle it.

See 'man connect' for details.

Best regards, Ilya Maximets.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21  9:45   ` Ilya Maximets
@ 2016-07-21 10:13     ` Yuanhan Liu
  2016-07-21 10:37       ` Ilya Maximets
  0 siblings, 1 reply; 20+ messages in thread
From: Yuanhan Liu @ 2016-07-21 10:13 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On Thu, Jul 21, 2016 at 12:45:32PM +0300, Ilya Maximets wrote:
> On 21.07.2016 12:37, Yuanhan Liu wrote:
> > On Thu, Jul 21, 2016 at 11:21:15AM +0300, Ilya Maximets wrote:
> >> If something abnormal happened to QEMU, 'connect()' can block calling
> >> thread (e.g. main thread of OVS) forever or for a really long time.
> >> This can break whole application or block the reconnection thread.
> >>
> >> Example with OVS:
> >>
> >> 	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
> >> 	(gdb) bt
> >> 	#0  connect () from /lib64/libpthread.so.0
> >> 	#1  vhost_user_create_client (vsocket=0xa816e0)
> >> 	#2  rte_vhost_driver_register
> >> 	#3  netdev_dpdk_vhost_user_construct
> >> 	#4  netdev_open (name=0xa664b0 "vhost1")
> >> 	[...]
> >> 	#11 main
> >>
> >> Fix that by setting non-blocking mode for client sockets for connection.
> >>
> >> Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")
> > 
> > Thanks for spotting and fixing yet another bug!
> > 
> >>  
> >> +static int
> >> +vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
> > 
> > I don't quite understand why this is needed: connect() with O_NONBLOCK
> > flag set is not enough?
> 
> There is a little issue with non-blocking connect() call. Connection
> establishing may be started but '-1' returned with 'errno = EINPROGRESS'.
> In this case we must wait on fd until it will be available for writing.
> After that we need to check current status of connection using getsockopt().
> 
> I don't sure that we're able to get such situation, but it's documented,
> and, I think, we should handle it.
> 
> See 'man connect' for details.

I see. Thanks.

But basically, I don't like the way of introduing yet another
fdset here. I'm wondering we could leverage current fdset code
to achieve that. This might need some work though.

So how about making it simple and stupid at this stage: sleep a
while (maybe 1ms, or maybe 1s) when that happens, and give up
when the connection is still not established?

	--yliu

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 10:13     ` Yuanhan Liu
@ 2016-07-21 10:37       ` Ilya Maximets
  2016-07-21 11:14         ` Ilya Maximets
  0 siblings, 1 reply; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21 10:37 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon



On 21.07.2016 13:13, Yuanhan Liu wrote:
> On Thu, Jul 21, 2016 at 12:45:32PM +0300, Ilya Maximets wrote:
>> On 21.07.2016 12:37, Yuanhan Liu wrote:
>>> On Thu, Jul 21, 2016 at 11:21:15AM +0300, Ilya Maximets wrote:
>>>> If something abnormal happened to QEMU, 'connect()' can block calling
>>>> thread (e.g. main thread of OVS) forever or for a really long time.
>>>> This can break whole application or block the reconnection thread.
>>>>
>>>> Example with OVS:
>>>>
>>>> 	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
>>>> 	(gdb) bt
>>>> 	#0  connect () from /lib64/libpthread.so.0
>>>> 	#1  vhost_user_create_client (vsocket=0xa816e0)
>>>> 	#2  rte_vhost_driver_register
>>>> 	#3  netdev_dpdk_vhost_user_construct
>>>> 	#4  netdev_open (name=0xa664b0 "vhost1")
>>>> 	[...]
>>>> 	#11 main
>>>>
>>>> Fix that by setting non-blocking mode for client sockets for connection.
>>>>
>>>> Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")
>>>
>>> Thanks for spotting and fixing yet another bug!
>>>
>>>>  
>>>> +static int
>>>> +vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
>>>
>>> I don't quite understand why this is needed: connect() with O_NONBLOCK
>>> flag set is not enough?
>>
>> There is a little issue with non-blocking connect() call. Connection
>> establishing may be started but '-1' returned with 'errno = EINPROGRESS'.
>> In this case we must wait on fd until it will be available for writing.
>> After that we need to check current status of connection using getsockopt().
>>
>> I don't sure that we're able to get such situation, but it's documented,
>> and, I think, we should handle it.
>>
>> See 'man connect' for details.
> 
> I see. Thanks.
> 
> But basically, I don't like the way of introduing yet another
> fdset here. I'm wondering we could leverage current fdset code
> to achieve that. This might need some work though.
> 
> So how about making it simple and stupid at this stage: sleep a
> while (maybe 1ms, or maybe 1s) when that happens, and give up
> when the connection is still not established?

Hmm, how about this fixup:
------------------------------------------------------------------------------
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 8626d13..b0f45e6 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -537,18 +537,7 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
 	errno = EINVAL;
 
 	ret = connect(fd, un, sz);
-	if (ret == -1 && errno != EINPROGRESS)
-		return -1;
-	if (ret == 0)
-		goto connected;
-
-	FD_ZERO(&fdset);
-	FD_SET(fd, &fdset);
-
-	ret = select(fd + 1, NULL, &fdset, NULL, &tv);
-	if (!ret)
-		errno = ETIMEDOUT;
-	if (ret != 1)
+	if (ret < 0 && errno != EISCONN)
 		return -1;
 
 	ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len);
@@ -558,7 +547,6 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
 		return -1;
 	}
 
-connected:
 	flags = fcntl(fd, F_GETFL, 0);
 	if (flags < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
------------------------------------------------------------------------------
?

We will not check the EINPROGRESS, but subsequent 'connect()' will return
EISCONN if connection already established. getsockopt() is kept just in
case. Subsequent 'connect()' will happen on the next iteration of
reconnection cycle (1 second sleep).

Best regards, Ilya Maximets.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [dpdk-dev] [PATCH v2] vhost: fix connect hang in client mode
  2016-07-21  8:21 [dpdk-dev] [PATCH] vhost: fix connect hang in client mode Ilya Maximets
  2016-07-21  9:37 ` Yuanhan Liu
@ 2016-07-21 11:12 ` Ilya Maximets
  2016-07-21 13:19 ` [dpdk-dev] [PATCH v3] " Ilya Maximets
  2 siblings, 0 replies; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21 11:12 UTC (permalink / raw)
  To: dev, Huawei Xie, Yuanhan Liu
  Cc: Dyasly Sergey, Heetae Ahn, Thomas Monjalon, Ilya Maximets

If something abnormal happened to QEMU, 'connect()' can block calling
thread (e.g. main thread of OVS) forever or for a really long time.
This can break whole application or block the reconnection thread.

Example with OVS:

	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
	(gdb) bt
	#0  connect () from /lib64/libpthread.so.0
	#1  vhost_user_create_client (vsocket=0xa816e0)
	#2  rte_vhost_driver_register
	#3  netdev_dpdk_vhost_user_construct
	#4  netdev_open (name=0xa664b0 "vhost1")
	[...]
	#11 main

Fix that by setting non-blocking mode for client sockets for connection.

Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---
This was reproduced with current QEMU master branch
(commit 1ecfb24da987b862f) + patch-set "vhost-user reconnect fixes"
(https://lists.nongnu.org/archive/html/qemu-devel/2016-07/msg01547.html).

OVS was patched to support client mode:
http://openvswitch.org/pipermail/dev/2016-July/074972.html

Following script forces QEMU to fail to initialize vhost because
disconnection occures while device not fully configured:

	while true
	do
		ovs-vsctl set Interface vhost1 ofport_request=125
		ovs-vsctl set Interface vhost1 ofport_request=126
	done

As a result: QEMU still works, network interface broken and OVS main
             thread stalled inside 'connect()'.

Version 2:
	* EINPROGRESS not checked. EISCONN checked instead on
	  the next iteration of reconnection loop.

 lib/librte_vhost/vhost_user/vhost-net-user.c | 62 ++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 8c6a096..63e0840 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -43,6 +43,7 @@
 #include <sys/un.h>
 #include <sys/queue.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <pthread.h>
 
 #include <rte_log.h>
@@ -449,6 +450,14 @@ create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
 	RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
 		is_server ? "server" : "client", fd);
 
+	if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"vhost-user: can't set nonblocking mode for socket, fd: "
+			"%d (%s)\n", fd, strerror(errno));
+		close(fd);
+		return -1;
+	}
+
 	memset(un, 0, sizeof(*un));
 	un->sun_family = AF_UNIX;
 	strncpy(un->sun_path, path, sizeof(un->sun_path));
@@ -516,9 +525,43 @@ struct vhost_user_reconnect_list {
 static struct vhost_user_reconnect_list reconn_list;
 static pthread_t reconn_tid;
 
+static int
+vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
+{
+	int ret, flags, so_error;
+	socklen_t len = sizeof(so_error);
+
+	errno = EINVAL;
+
+	ret = connect(fd, un, sz);
+	if (ret < 0 && errno != EISCONN)
+		return -1;
+
+	ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len);
+	if (ret < 0 || so_error) {
+		if (!ret)
+			errno = so_error;
+		return -1;
+	}
+
+	flags = fcntl(fd, F_GETFL, 0);
+	if (flags < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"can't get flags for connfd %d\n", fd);
+		return -2;
+	}
+	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"can't disable nonblocking on fd %d\n", fd);
+		return -2;
+	}
+	return 0;
+}
+
 static void *
 vhost_user_client_reconnect(void *arg __rte_unused)
 {
+	int ret;
 	struct vhost_user_reconnect *reconn, *next;
 
 	while (1) {
@@ -532,13 +575,23 @@ vhost_user_client_reconnect(void *arg __rte_unused)
 		     reconn != NULL; reconn = next) {
 			next = TAILQ_NEXT(reconn, next);
 
-			if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
-				    sizeof(reconn->un)) < 0)
+			ret = vhost_user_connect_nonblock(reconn->fd,
+						(struct sockaddr *)&reconn->un,
+						sizeof(reconn->un));
+			if (ret == -2) {
+				close(reconn->fd);
+				RTE_LOG(ERR, VHOST_CONFIG,
+					"reconnection for fd %d failed\n",
+					reconn->fd);
+				goto remove_fd;
+			}
+			if (ret == -1)
 				continue;
 
 			RTE_LOG(INFO, VHOST_CONFIG,
 				"%s: connected\n", reconn->vsocket->path);
 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
+remove_fd:
 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
 			free(reconn);
 		}
@@ -579,7 +632,8 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
 	if (fd < 0)
 		return -1;
 
-	ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+	ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
+					  sizeof(un));
 	if (ret == 0) {
 		vhost_user_add_connection(fd, vsocket);
 		return 0;
@@ -589,7 +643,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
 		"failed to connect to %s: %s\n",
 		path, strerror(errno));
 
-	if (!vsocket->reconnect) {
+	if (ret == -2 || !vsocket->reconnect) {
 		close(fd);
 		return -1;
 	}
-- 
2.7.4

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 10:37       ` Ilya Maximets
@ 2016-07-21 11:14         ` Ilya Maximets
  2016-07-21 11:40           ` Yuanhan Liu
  0 siblings, 1 reply; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21 11:14 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On 21.07.2016 13:37, Ilya Maximets wrote:
> 
> 
> On 21.07.2016 13:13, Yuanhan Liu wrote:
>> On Thu, Jul 21, 2016 at 12:45:32PM +0300, Ilya Maximets wrote:
>>> On 21.07.2016 12:37, Yuanhan Liu wrote:
>>>> On Thu, Jul 21, 2016 at 11:21:15AM +0300, Ilya Maximets wrote:
>>>>> If something abnormal happened to QEMU, 'connect()' can block calling
>>>>> thread (e.g. main thread of OVS) forever or for a really long time.
>>>>> This can break whole application or block the reconnection thread.
>>>>>
>>>>> Example with OVS:
>>>>>
>>>>> 	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
>>>>> 	(gdb) bt
>>>>> 	#0  connect () from /lib64/libpthread.so.0
>>>>> 	#1  vhost_user_create_client (vsocket=0xa816e0)
>>>>> 	#2  rte_vhost_driver_register
>>>>> 	#3  netdev_dpdk_vhost_user_construct
>>>>> 	#4  netdev_open (name=0xa664b0 "vhost1")
>>>>> 	[...]
>>>>> 	#11 main
>>>>>
>>>>> Fix that by setting non-blocking mode for client sockets for connection.
>>>>>
>>>>> Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")
>>>>
>>>> Thanks for spotting and fixing yet another bug!
>>>>
>>>>>  
>>>>> +static int
>>>>> +vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
>>>>
>>>> I don't quite understand why this is needed: connect() with O_NONBLOCK
>>>> flag set is not enough?
>>>
>>> There is a little issue with non-blocking connect() call. Connection
>>> establishing may be started but '-1' returned with 'errno = EINPROGRESS'.
>>> In this case we must wait on fd until it will be available for writing.
>>> After that we need to check current status of connection using getsockopt().
>>>
>>> I don't sure that we're able to get such situation, but it's documented,
>>> and, I think, we should handle it.
>>>
>>> See 'man connect' for details.
>>
>> I see. Thanks.
>>
>> But basically, I don't like the way of introduing yet another
>> fdset here. I'm wondering we could leverage current fdset code
>> to achieve that. This might need some work though.
>>
>> So how about making it simple and stupid at this stage: sleep a
>> while (maybe 1ms, or maybe 1s) when that happens, and give up
>> when the connection is still not established?
> 
> Hmm, how about this fixup:
> ------------------------------------------------------------------------------
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> index 8626d13..b0f45e6 100644
> --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> @@ -537,18 +537,7 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
>  	errno = EINVAL;
>  
>  	ret = connect(fd, un, sz);
> -	if (ret == -1 && errno != EINPROGRESS)
> -		return -1;
> -	if (ret == 0)
> -		goto connected;
> -
> -	FD_ZERO(&fdset);
> -	FD_SET(fd, &fdset);
> -
> -	ret = select(fd + 1, NULL, &fdset, NULL, &tv);
> -	if (!ret)
> -		errno = ETIMEDOUT;
> -	if (ret != 1)
> +	if (ret < 0 && errno != EISCONN)
>  		return -1;
>  
>  	ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len);
> @@ -558,7 +547,6 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
>  		return -1;
>  	}
>  
> -connected:
>  	flags = fcntl(fd, F_GETFL, 0);
>  	if (flags < 0) {
>  		RTE_LOG(ERR, VHOST_CONFIG,
> ------------------------------------------------------------------------------
> ?
> 
> We will not check the EINPROGRESS, but subsequent 'connect()' will return
> EISCONN if connection already established. getsockopt() is kept just in
> case. Subsequent 'connect()' will happen on the next iteration of
> reconnection cycle (1 second sleep).

I've sent v2 with this changes.

Best regards, Ilya Maximets.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 11:14         ` Ilya Maximets
@ 2016-07-21 11:40           ` Yuanhan Liu
  2016-07-21 12:10             ` Ilya Maximets
  0 siblings, 1 reply; 20+ messages in thread
From: Yuanhan Liu @ 2016-07-21 11:40 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On Thu, Jul 21, 2016 at 02:14:59PM +0300, Ilya Maximets wrote:
> > Hmm, how about this fixup:
> > ------------------------------------------------------------------------------
> > diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> > index 8626d13..b0f45e6 100644
> > --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
> > +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> > @@ -537,18 +537,7 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
> >  	errno = EINVAL;
> >  
> >  	ret = connect(fd, un, sz);
> > -	if (ret == -1 && errno != EINPROGRESS)
> > -		return -1;
> > -	if (ret == 0)
> > -		goto connected;
> > -
> > -	FD_ZERO(&fdset);
> > -	FD_SET(fd, &fdset);
> > -
> > -	ret = select(fd + 1, NULL, &fdset, NULL, &tv);
> > -	if (!ret)
> > -		errno = ETIMEDOUT;
> > -	if (ret != 1)
> > +	if (ret < 0 && errno != EISCONN)
> >  		return -1;
> >  
> >  	ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len);
> > @@ -558,7 +547,6 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
> >  		return -1;
> >  	}
> >  
> > -connected:
> >  	flags = fcntl(fd, F_GETFL, 0);
> >  	if (flags < 0) {
> >  		RTE_LOG(ERR, VHOST_CONFIG,
> > ------------------------------------------------------------------------------
> > ?
> > 
> > We will not check the EINPROGRESS, but subsequent 'connect()' will return
> > EISCONN if connection already established. getsockopt() is kept just in
> > case. Subsequent 'connect()' will happen on the next iteration of
> > reconnection cycle (1 second sleep).
> 
> I've sent v2 with this changes.

Thanks. But still, it doesn't look clean to me. I was thinking following
might be cleaner?

    diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c
    b/lib/librte_vhost/vhost_user/vhost-net-user.
    index f0f92f8..c0ef290 100644
    --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
    +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
    @@ -532,6 +532,10 @@ vhost_user_client_reconnect(void *arg __rte_unused)
                         reconn != NULL; reconn = next) {
                            next = TAILQ_NEXT(reconn, next);
    
    +                       if (reconn->conn_inprogress) {
    +                               /* do connect check here */
    +                       }
    +
                            if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
                                        sizeof(reconn->un)) < 0)
                                    continue;
    @@ -605,6 +609,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
            reconn->un = un;
            reconn->fd = fd;
            reconn->vsocket = vsocket;
    +       reconn->conn_inprogress = errno == EINPROGRESS;
            pthread_mutex_lock(&reconn_list.mutex);
            TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
            pthread_mutex_unlock(&reconn_list.mutex);

It's just a rough diff, hopefully it shows my idea clearly. And of
course, we should not call connect() anymore when conn_inprogress
is set.

What do you think of it?

	--yliu

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 11:40           ` Yuanhan Liu
@ 2016-07-21 12:10             ` Ilya Maximets
  2016-07-21 12:13               ` Ilya Maximets
  0 siblings, 1 reply; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21 12:10 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On 21.07.2016 14:40, Yuanhan Liu wrote:
> On Thu, Jul 21, 2016 at 02:14:59PM +0300, Ilya Maximets wrote:
>>> Hmm, how about this fixup:
>>> ------------------------------------------------------------------------------
>>> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
>>> index 8626d13..b0f45e6 100644
>>> --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
>>> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
>>> @@ -537,18 +537,7 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
>>>  	errno = EINVAL;
>>>  
>>>  	ret = connect(fd, un, sz);
>>> -	if (ret == -1 && errno != EINPROGRESS)
>>> -		return -1;
>>> -	if (ret == 0)
>>> -		goto connected;
>>> -
>>> -	FD_ZERO(&fdset);
>>> -	FD_SET(fd, &fdset);
>>> -
>>> -	ret = select(fd + 1, NULL, &fdset, NULL, &tv);
>>> -	if (!ret)
>>> -		errno = ETIMEDOUT;
>>> -	if (ret != 1)
>>> +	if (ret < 0 && errno != EISCONN)
>>>  		return -1;
>>>  
>>>  	ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len);
>>> @@ -558,7 +547,6 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
>>>  		return -1;
>>>  	}
>>>  
>>> -connected:
>>>  	flags = fcntl(fd, F_GETFL, 0);
>>>  	if (flags < 0) {
>>>  		RTE_LOG(ERR, VHOST_CONFIG,
>>> ------------------------------------------------------------------------------
>>> ?
>>>
>>> We will not check the EINPROGRESS, but subsequent 'connect()' will return
>>> EISCONN if connection already established. getsockopt() is kept just in
>>> case. Subsequent 'connect()' will happen on the next iteration of
>>> reconnection cycle (1 second sleep).
>>
>> I've sent v2 with this changes.
> 
> Thanks. But still, it doesn't look clean to me. I was thinking following
> might be cleaner?
> 
>     diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c
>     b/lib/librte_vhost/vhost_user/vhost-net-user.
>     index f0f92f8..c0ef290 100644
>     --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
>     +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
>     @@ -532,6 +532,10 @@ vhost_user_client_reconnect(void *arg __rte_unused)
>                          reconn != NULL; reconn = next) {
>                             next = TAILQ_NEXT(reconn, next);
>     
>     +                       if (reconn->conn_inprogress) {
>     +                               /* do connect check here */
>     +                       }
>     +
>                             if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
>                                         sizeof(reconn->un)) < 0)
>                                     continue;
>     @@ -605,6 +609,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
>             reconn->un = un;
>             reconn->fd = fd;
>             reconn->vsocket = vsocket;
>     +       reconn->conn_inprogress = errno == EINPROGRESS;
>             pthread_mutex_lock(&reconn_list.mutex);
>             TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
>             pthread_mutex_unlock(&reconn_list.mutex);
> 
> It's just a rough diff, hopefully it shows my idea clearly. And of
> course, we should not call connect() anymore when conn_inprogress
> is set.
> 
> What do you think of it?

I found that we can't check connection status without select/poll
on it. 'getsockopt()' will return 0 with no errors if connection
is not still established just like if it was.
So, I think, the first version of this patch is the only
acceptable solution.

Best regards, Ilya Maximets.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 12:10             ` Ilya Maximets
@ 2016-07-21 12:13               ` Ilya Maximets
  2016-07-21 12:35                 ` Yuanhan Liu
  0 siblings, 1 reply; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21 12:13 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon



On 21.07.2016 15:10, Ilya Maximets wrote:
> On 21.07.2016 14:40, Yuanhan Liu wrote:
>> On Thu, Jul 21, 2016 at 02:14:59PM +0300, Ilya Maximets wrote:
>>>> Hmm, how about this fixup:
>>>> ------------------------------------------------------------------------------
>>>> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
>>>> index 8626d13..b0f45e6 100644
>>>> --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
>>>> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
>>>> @@ -537,18 +537,7 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
>>>>  	errno = EINVAL;
>>>>  
>>>>  	ret = connect(fd, un, sz);
>>>> -	if (ret == -1 && errno != EINPROGRESS)
>>>> -		return -1;
>>>> -	if (ret == 0)
>>>> -		goto connected;
>>>> -
>>>> -	FD_ZERO(&fdset);
>>>> -	FD_SET(fd, &fdset);
>>>> -
>>>> -	ret = select(fd + 1, NULL, &fdset, NULL, &tv);
>>>> -	if (!ret)
>>>> -		errno = ETIMEDOUT;
>>>> -	if (ret != 1)
>>>> +	if (ret < 0 && errno != EISCONN)
>>>>  		return -1;
>>>>  
>>>>  	ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len);
>>>> @@ -558,7 +547,6 @@ vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
>>>>  		return -1;
>>>>  	}
>>>>  
>>>> -connected:
>>>>  	flags = fcntl(fd, F_GETFL, 0);
>>>>  	if (flags < 0) {
>>>>  		RTE_LOG(ERR, VHOST_CONFIG,
>>>> ------------------------------------------------------------------------------
>>>> ?
>>>>
>>>> We will not check the EINPROGRESS, but subsequent 'connect()' will return
>>>> EISCONN if connection already established. getsockopt() is kept just in
>>>> case. Subsequent 'connect()' will happen on the next iteration of
>>>> reconnection cycle (1 second sleep).
>>>
>>> I've sent v2 with this changes.
>>
>> Thanks. But still, it doesn't look clean to me. I was thinking following
>> might be cleaner?
>>
>>     diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c
>>     b/lib/librte_vhost/vhost_user/vhost-net-user.
>>     index f0f92f8..c0ef290 100644
>>     --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
>>     +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
>>     @@ -532,6 +532,10 @@ vhost_user_client_reconnect(void *arg __rte_unused)
>>                          reconn != NULL; reconn = next) {
>>                             next = TAILQ_NEXT(reconn, next);
>>     
>>     +                       if (reconn->conn_inprogress) {
>>     +                               /* do connect check here */
>>     +                       }
>>     +
>>                             if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
>>                                         sizeof(reconn->un)) < 0)
>>                                     continue;
>>     @@ -605,6 +609,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
>>             reconn->un = un;
>>             reconn->fd = fd;
>>             reconn->vsocket = vsocket;
>>     +       reconn->conn_inprogress = errno == EINPROGRESS;
>>             pthread_mutex_lock(&reconn_list.mutex);
>>             TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
>>             pthread_mutex_unlock(&reconn_list.mutex);
>>
>> It's just a rough diff, hopefully it shows my idea clearly. And of
>> course, we should not call connect() anymore when conn_inprogress
>> is set.
>>
>> What do you think of it?
> 
> I found that we can't check connection status without select/poll
> on it. 'getsockopt()' will return 0 with no errors if connection
> is not still established just like if it was.
> So, I think, the first version of this patch is the only
> acceptable solution.

Sorry, v2 is acceptable too, because it always calls 'connect()'.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 12:13               ` Ilya Maximets
@ 2016-07-21 12:35                 ` Yuanhan Liu
  2016-07-21 12:42                   ` Ilya Maximets
  0 siblings, 1 reply; 20+ messages in thread
From: Yuanhan Liu @ 2016-07-21 12:35 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On Thu, Jul 21, 2016 at 03:13:14PM +0300, Ilya Maximets wrote:
> >>
> >> What do you think of it?
> > 
> > I found that we can't check connection status without select/poll
> > on it. 'getsockopt()' will return 0 with no errors if connection
> > is not still established just like if it was.
> > So, I think, the first version of this patch is the only
> > acceptable solution.
> 
> Sorry, v2 is acceptable too, because it always calls 'connect()'.

So you have done the test that it works? I'm more curious to know
could your above case hit the getsockopt() code path, I mean, the
path that errno is set to EINPROGRESS or EISCONN?

	--yliu

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 12:35                 ` Yuanhan Liu
@ 2016-07-21 12:42                   ` Ilya Maximets
  2016-07-21 12:58                     ` Yuanhan Liu
  0 siblings, 1 reply; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21 12:42 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On 21.07.2016 15:35, Yuanhan Liu wrote:
> On Thu, Jul 21, 2016 at 03:13:14PM +0300, Ilya Maximets wrote:
>>>>
>>>> What do you think of it?
>>>
>>> I found that we can't check connection status without select/poll
>>> on it. 'getsockopt()' will return 0 with no errors if connection
>>> is not still established just like if it was.
>>> So, I think, the first version of this patch is the only
>>> acceptable solution.
>>
>> Sorry, v2 is acceptable too, because it always calls 'connect()'.
> 
> So you have done the test that it works?

No, it's just theory. I don't know how to test this.

> I'm more curious to know
> could your above case hit the getsockopt() code path, I mean, the
> path that errno is set to EINPROGRESS or EISCONN?

As I already told, I don't sure that we're able to get EINPROGRESS
on our AF_UNIX sockets.
In v2 'getsockopt()' check is unnecessary.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 12:58                     ` Yuanhan Liu
@ 2016-07-21 12:58                       ` Ilya Maximets
  2016-07-21 13:10                         ` Yuanhan Liu
  0 siblings, 1 reply; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21 12:58 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On 21.07.2016 15:58, Yuanhan Liu wrote:
> On Thu, Jul 21, 2016 at 03:42:54PM +0300, Ilya Maximets wrote:
>> On 21.07.2016 15:35, Yuanhan Liu wrote:
>>> On Thu, Jul 21, 2016 at 03:13:14PM +0300, Ilya Maximets wrote:
>>>>>>
>>>>>> What do you think of it?
>>>>>
>>>>> I found that we can't check connection status without select/poll
>>>>> on it. 'getsockopt()' will return 0 with no errors if connection
>>>>> is not still established just like if it was.
>>>>> So, I think, the first version of this patch is the only
>>>>> acceptable solution.
>>>>
>>>> Sorry, v2 is acceptable too, because it always calls 'connect()'.
>>>
>>> So you have done the test that it works?
>>
>> No, it's just theory. I don't know how to test this.
>>
>>> I'm more curious to know
>>> could your above case hit the getsockopt() code path, I mean, the
>>> path that errno is set to EINPROGRESS or EISCONN?
>>
>> As I already told, I don't sure that we're able to get EINPROGRESS
>> on our AF_UNIX sockets.
>> In v2 'getsockopt()' check is unnecessary.
> 
> We then have no reason to keep it?

You want me to re-send v2 without 'getsockopt()' check?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 12:42                   ` Ilya Maximets
@ 2016-07-21 12:58                     ` Yuanhan Liu
  2016-07-21 12:58                       ` Ilya Maximets
  0 siblings, 1 reply; 20+ messages in thread
From: Yuanhan Liu @ 2016-07-21 12:58 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On Thu, Jul 21, 2016 at 03:42:54PM +0300, Ilya Maximets wrote:
> On 21.07.2016 15:35, Yuanhan Liu wrote:
> > On Thu, Jul 21, 2016 at 03:13:14PM +0300, Ilya Maximets wrote:
> >>>>
> >>>> What do you think of it?
> >>>
> >>> I found that we can't check connection status without select/poll
> >>> on it. 'getsockopt()' will return 0 with no errors if connection
> >>> is not still established just like if it was.
> >>> So, I think, the first version of this patch is the only
> >>> acceptable solution.
> >>
> >> Sorry, v2 is acceptable too, because it always calls 'connect()'.
> > 
> > So you have done the test that it works?
> 
> No, it's just theory. I don't know how to test this.
> 
> > I'm more curious to know
> > could your above case hit the getsockopt() code path, I mean, the
> > path that errno is set to EINPROGRESS or EISCONN?
> 
> As I already told, I don't sure that we're able to get EINPROGRESS
> on our AF_UNIX sockets.
> In v2 'getsockopt()' check is unnecessary.

We then have no reason to keep it?

	--yliu

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: fix connect hang in client mode
  2016-07-21 12:58                       ` Ilya Maximets
@ 2016-07-21 13:10                         ` Yuanhan Liu
  0 siblings, 0 replies; 20+ messages in thread
From: Yuanhan Liu @ 2016-07-21 13:10 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On Thu, Jul 21, 2016 at 03:58:11PM +0300, Ilya Maximets wrote:
> On 21.07.2016 15:58, Yuanhan Liu wrote:
> > On Thu, Jul 21, 2016 at 03:42:54PM +0300, Ilya Maximets wrote:
> >> On 21.07.2016 15:35, Yuanhan Liu wrote:
> >>> On Thu, Jul 21, 2016 at 03:13:14PM +0300, Ilya Maximets wrote:
> >>>>>>
> >>>>>> What do you think of it?
> >>>>>
> >>>>> I found that we can't check connection status without select/poll
> >>>>> on it. 'getsockopt()' will return 0 with no errors if connection
> >>>>> is not still established just like if it was.
> >>>>> So, I think, the first version of this patch is the only
> >>>>> acceptable solution.
> >>>>
> >>>> Sorry, v2 is acceptable too, because it always calls 'connect()'.
> >>>
> >>> So you have done the test that it works?
> >>
> >> No, it's just theory. I don't know how to test this.
> >>
> >>> I'm more curious to know
> >>> could your above case hit the getsockopt() code path, I mean, the
> >>> path that errno is set to EINPROGRESS or EISCONN?
> >>
> >> As I already told, I don't sure that we're able to get EINPROGRESS
> >> on our AF_UNIX sockets.
> >> In v2 'getsockopt()' check is unnecessary.
> > 
> > We then have no reason to keep it?
> 
> You want me to re-send v2 without 'getsockopt()' check?

Yes, because I'm not sure it will work without select or poll.

	--yliu

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [dpdk-dev] [PATCH v3] vhost: fix connect hang in client mode
  2016-07-21  8:21 [dpdk-dev] [PATCH] vhost: fix connect hang in client mode Ilya Maximets
  2016-07-21  9:37 ` Yuanhan Liu
  2016-07-21 11:12 ` [dpdk-dev] [PATCH v2] " Ilya Maximets
@ 2016-07-21 13:19 ` Ilya Maximets
  2016-07-21 13:35   ` Yuanhan Liu
  2 siblings, 1 reply; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21 13:19 UTC (permalink / raw)
  To: dev, Huawei Xie, Yuanhan Liu
  Cc: Dyasly Sergey, Heetae Ahn, Thomas Monjalon, Ilya Maximets

If something abnormal happened to QEMU, 'connect()' can block calling
thread (e.g. main thread of OVS) forever or for a really long time.
This can break whole application or block the reconnection thread.

Example with OVS:

	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
	(gdb) bt
	#0  connect () from /lib64/libpthread.so.0
	#1  vhost_user_create_client (vsocket=0xa816e0)
	#2  rte_vhost_driver_register
	#3  netdev_dpdk_vhost_user_construct
	#4  netdev_open (name=0xa664b0 "vhost1")
	[...]
	#11 main

Fix that by setting non-blocking mode for client sockets for connection.

Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---
This was reproduced with current QEMU master branch
(commit 1ecfb24da987b862f) + patch-set "vhost-user reconnect fixes"
(https://lists.nongnu.org/archive/html/qemu-devel/2016-07/msg01547.html).

OVS was patched to support client mode:
http://openvswitch.org/pipermail/dev/2016-July/074972.html

Following script forces QEMU to fail to initialize vhost because
disconnection occures while device not fully configured:

	while true
	do
		ovs-vsctl set Interface vhost1 ofport_request=125
		ovs-vsctl set Interface vhost1 ofport_request=126
	done

As a result: QEMU still works, network interface broken and OVS main
             thread stalled inside 'connect()'.

Version 3:
	* Removed unnecessary 'getsockopt()' check.

Version 2:
	* EINPROGRESS not checked. EISCONN checked instead on
	  the next iteration of reconnection loop.



 lib/librte_vhost/vhost_user/vhost-net-user.c | 52 +++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 62c5ec3..b35594d 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -43,6 +43,7 @@
 #include <sys/un.h>
 #include <sys/queue.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <pthread.h>
 
 #include <rte_log.h>
@@ -449,6 +450,14 @@ create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
 	RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
 		is_server ? "server" : "client", fd);
 
+	if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"vhost-user: can't set nonblocking mode for socket, fd: "
+			"%d (%s)\n", fd, strerror(errno));
+		close(fd);
+		return -1;
+	}
+
 	memset(un, 0, sizeof(*un));
 	un->sun_family = AF_UNIX;
 	strncpy(un->sun_path, path, sizeof(un->sun_path));
@@ -516,9 +525,33 @@ struct vhost_user_reconnect_list {
 static struct vhost_user_reconnect_list reconn_list;
 static pthread_t reconn_tid;
 
+static int
+vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
+{
+	int ret, flags;
+
+	ret = connect(fd, un, sz);
+	if (ret < 0 && errno != EISCONN)
+		return -1;
+
+	flags = fcntl(fd, F_GETFL, 0);
+	if (flags < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"can't get flags for connfd %d\n", fd);
+		return -2;
+	}
+	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"can't disable nonblocking on fd %d\n", fd);
+		return -2;
+	}
+	return 0;
+}
+
 static void *
 vhost_user_client_reconnect(void *arg __rte_unused)
 {
+	int ret;
 	struct vhost_user_reconnect *reconn, *next;
 
 	while (1) {
@@ -532,13 +565,23 @@ vhost_user_client_reconnect(void *arg __rte_unused)
 		     reconn != NULL; reconn = next) {
 			next = TAILQ_NEXT(reconn, next);
 
-			if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
-				    sizeof(reconn->un)) < 0)
+			ret = vhost_user_connect_nonblock(reconn->fd,
+						(struct sockaddr *)&reconn->un,
+						sizeof(reconn->un));
+			if (ret == -2) {
+				close(reconn->fd);
+				RTE_LOG(ERR, VHOST_CONFIG,
+					"reconnection for fd %d failed\n",
+					reconn->fd);
+				goto remove_fd;
+			}
+			if (ret == -1)
 				continue;
 
 			RTE_LOG(INFO, VHOST_CONFIG,
 				"%s: connected\n", reconn->vsocket->path);
 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
+remove_fd:
 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
 			free(reconn);
 		}
@@ -579,7 +622,8 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
 	if (fd < 0)
 		return -1;
 
-	ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+	ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
+					  sizeof(un));
 	if (ret == 0) {
 		vhost_user_add_connection(fd, vsocket);
 		return 0;
@@ -589,7 +633,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
 		"failed to connect to %s: %s\n",
 		path, strerror(errno));
 
-	if (!vsocket->reconnect) {
+	if (ret == -2 || !vsocket->reconnect) {
 		close(fd);
 		return -1;
 	}
-- 
2.7.4

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v3] vhost: fix connect hang in client mode
  2016-07-21 13:19 ` [dpdk-dev] [PATCH v3] " Ilya Maximets
@ 2016-07-21 13:35   ` Yuanhan Liu
  2016-07-21 13:43     ` Ilya Maximets
  2016-07-21 22:21     ` Thomas Monjalon
  0 siblings, 2 replies; 20+ messages in thread
From: Yuanhan Liu @ 2016-07-21 13:35 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On Thu, Jul 21, 2016 at 04:19:35PM +0300, Ilya Maximets wrote:
> If something abnormal happened to QEMU, 'connect()' can block calling
> thread (e.g. main thread of OVS) forever or for a really long time.
> This can break whole application or block the reconnection thread.
> 
> Example with OVS:
> 
> 	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
> 	(gdb) bt
> 	#0  connect () from /lib64/libpthread.so.0
> 	#1  vhost_user_create_client (vsocket=0xa816e0)
> 	#2  rte_vhost_driver_register
> 	#3  netdev_dpdk_vhost_user_construct
> 	#4  netdev_open (name=0xa664b0 "vhost1")
> 	[...]
> 	#11 main
> 
> Fix that by setting non-blocking mode for client sockets for connection.
> 
> Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")
> 
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>

One help I'd like to ask is that I'd appriciate if you could do the test
to make sure that your 2 (latest) patches fix the two issues you reported.

You might have already done that; I just want to make sure.

Thanks.

	--yliu

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v3] vhost: fix connect hang in client mode
  2016-07-21 13:35   ` Yuanhan Liu
@ 2016-07-21 13:43     ` Ilya Maximets
  2016-07-21 13:56       ` Yuanhan Liu
  2016-07-21 22:21     ` Thomas Monjalon
  1 sibling, 1 reply; 20+ messages in thread
From: Ilya Maximets @ 2016-07-21 13:43 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon



On 21.07.2016 16:35, Yuanhan Liu wrote:
> On Thu, Jul 21, 2016 at 04:19:35PM +0300, Ilya Maximets wrote:
>> If something abnormal happened to QEMU, 'connect()' can block calling
>> thread (e.g. main thread of OVS) forever or for a really long time.
>> This can break whole application or block the reconnection thread.
>>
>> Example with OVS:
>>
>> 	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
>> 	(gdb) bt
>> 	#0  connect () from /lib64/libpthread.so.0
>> 	#1  vhost_user_create_client (vsocket=0xa816e0)
>> 	#2  rte_vhost_driver_register
>> 	#3  netdev_dpdk_vhost_user_construct
>> 	#4  netdev_open (name=0xa664b0 "vhost1")
>> 	[...]
>> 	#11 main
>>
>> Fix that by setting non-blocking mode for client sockets for connection.
>>
>> Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")
>>
>> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> 
> Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> 
> One help I'd like to ask is that I'd appriciate if you could do the test
> to make sure that your 2 (latest) patches fix the two issues you reported.
> 
> You might have already done that; I just want to make sure.

I've performed the test with 'ofport_request' script before sending patches.
And currently test still works. No leaks of descriptors, no hangs,
no QEMU crashes observed.
Sometimes network device breaks on QEMU side, but it's QEMU issue. In this
case I'm receiving following message from DPDK's vhost:

VHOST_CONFIG: vhost-user client: socket created, fd: 28
VHOST_CONFIG: failed to connect to /vhost1: Resource temporarily unavailable
VHOST_CONFIG: /vhost1: reconnecting...

Before the 'hang' patch there was hang of main thread.

After QEMU restart all works normally. OVS restart not required.

Best regards, Ilya Maximets.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v3] vhost: fix connect hang in client mode
  2016-07-21 13:43     ` Ilya Maximets
@ 2016-07-21 13:56       ` Yuanhan Liu
  0 siblings, 0 replies; 20+ messages in thread
From: Yuanhan Liu @ 2016-07-21 13:56 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: dev, Huawei Xie, Dyasly Sergey, Heetae Ahn, Thomas Monjalon

On Thu, Jul 21, 2016 at 04:43:25PM +0300, Ilya Maximets wrote:
> 
> 
> On 21.07.2016 16:35, Yuanhan Liu wrote:
> > On Thu, Jul 21, 2016 at 04:19:35PM +0300, Ilya Maximets wrote:
> >> If something abnormal happened to QEMU, 'connect()' can block calling
> >> thread (e.g. main thread of OVS) forever or for a really long time.
> >> This can break whole application or block the reconnection thread.
> >>
> >> Example with OVS:
> >>
> >> 	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
> >> 	(gdb) bt
> >> 	#0  connect () from /lib64/libpthread.so.0
> >> 	#1  vhost_user_create_client (vsocket=0xa816e0)
> >> 	#2  rte_vhost_driver_register
> >> 	#3  netdev_dpdk_vhost_user_construct
> >> 	#4  netdev_open (name=0xa664b0 "vhost1")
> >> 	[...]
> >> 	#11 main
> >>
> >> Fix that by setting non-blocking mode for client sockets for connection.
> >>
> >> Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")
> >>
> >> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> > 
> > Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> > 
> > One help I'd like to ask is that I'd appriciate if you could do the test
> > to make sure that your 2 (latest) patches fix the two issues you reported.
> > 
> > You might have already done that; I just want to make sure.
> 
> I've performed the test with 'ofport_request' script before sending patches.
> And currently test still works. No leaks of descriptors, no hangs,
> no QEMU crashes observed.
> Sometimes network device breaks on QEMU side, but it's QEMU issue. In this
> case I'm receiving following message from DPDK's vhost:
> 
> VHOST_CONFIG: vhost-user client: socket created, fd: 28
> VHOST_CONFIG: failed to connect to /vhost1: Resource temporarily unavailable
> VHOST_CONFIG: /vhost1: reconnecting...
> 
> Before the 'hang' patch there was hang of main thread.
> 
> After QEMU restart all works normally. OVS restart not required.

Good to know and appreciate that!

	--yliu

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v3] vhost: fix connect hang in client mode
  2016-07-21 13:35   ` Yuanhan Liu
  2016-07-21 13:43     ` Ilya Maximets
@ 2016-07-21 22:21     ` Thomas Monjalon
  1 sibling, 0 replies; 20+ messages in thread
From: Thomas Monjalon @ 2016-07-21 22:21 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: Yuanhan Liu, dev, Huawei Xie, Dyasly Sergey, Heetae Ahn

2016-07-21 21:35, Yuanhan Liu:
> On Thu, Jul 21, 2016 at 04:19:35PM +0300, Ilya Maximets wrote:
> > If something abnormal happened to QEMU, 'connect()' can block calling
> > thread (e.g. main thread of OVS) forever or for a really long time.
> > This can break whole application or block the reconnection thread.
> > 
> > Example with OVS:
> > 
> > 	ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
> > 	(gdb) bt
> > 	#0  connect () from /lib64/libpthread.so.0
> > 	#1  vhost_user_create_client (vsocket=0xa816e0)
> > 	#2  rte_vhost_driver_register
> > 	#3  netdev_dpdk_vhost_user_construct
> > 	#4  netdev_open (name=0xa664b0 "vhost1")
> > 	[...]
> > 	#11 main
> > 
> > Fix that by setting non-blocking mode for client sockets for connection.
> > 
> > Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")
> > 
> > Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> 
> Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>

Applied, thanks

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2016-07-21 22:21 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-21  8:21 [dpdk-dev] [PATCH] vhost: fix connect hang in client mode Ilya Maximets
2016-07-21  9:37 ` Yuanhan Liu
2016-07-21  9:45   ` Ilya Maximets
2016-07-21 10:13     ` Yuanhan Liu
2016-07-21 10:37       ` Ilya Maximets
2016-07-21 11:14         ` Ilya Maximets
2016-07-21 11:40           ` Yuanhan Liu
2016-07-21 12:10             ` Ilya Maximets
2016-07-21 12:13               ` Ilya Maximets
2016-07-21 12:35                 ` Yuanhan Liu
2016-07-21 12:42                   ` Ilya Maximets
2016-07-21 12:58                     ` Yuanhan Liu
2016-07-21 12:58                       ` Ilya Maximets
2016-07-21 13:10                         ` Yuanhan Liu
2016-07-21 11:12 ` [dpdk-dev] [PATCH v2] " Ilya Maximets
2016-07-21 13:19 ` [dpdk-dev] [PATCH v3] " Ilya Maximets
2016-07-21 13:35   ` Yuanhan Liu
2016-07-21 13:43     ` Ilya Maximets
2016-07-21 13:56       ` Yuanhan Liu
2016-07-21 22:21     ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).