When rte_vhost_driver_unregister delete the connection fd, fdset_try_del will always try and donot release the vhostuser.mutex if the fd is busy, but the fdset_event_dispatch will set the fd to busy and call vhost_user_msg_handler to get vhostuser.mutex, which will cause deadlock. To fix it: Unlock the vhost_user.mutex if fdset_try_del fail and relock it when retry. --- lib/librte_vhost/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 9cf34ad17..a9effa115 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -961,13 +961,12 @@ rte_vhost_driver_unregister(const char *path) int count; struct vhost_user_connection *conn, *next; +again: pthread_mutex_lock(&vhost_user.mutex); for (i = 0; i < vhost_user.vsocket_cnt; i++) { struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; - if (!strcmp(vsocket->path, path)) { -again: pthread_mutex_lock(&vsocket->conn_mutex); for (conn = TAILQ_FIRST(&vsocket->conn_list); conn != NULL; @@ -981,6 +980,7 @@ rte_vhost_driver_unregister(const char *path) */ if (fdset_try_del(&vhost_user.fdset, conn->connfd) == -1) { + pthread_mutex_unlock(&vhost_user.mutex); pthread_mutex_unlock( &vsocket->conn_mutex); goto again; -- 2.14.3 (Apple Git-98)
When rte_vhost_driver_unregister delete the connection fd, fdset_try_del will always try and donot release the vhostuser.mutex if the fd is busy, but the fdset_event_dispatch will set the fd to busy and call vhost_user_msg_handler to get vhostuser.mutex, which will cause deadlock. Unlock the vhost_user.mutex if fdset_try_del fail and relock it when retry. Signed-off-by: findtheonlway <findtheonlyway@gmail.com> Signed-off-by: sunwenjie <sunwenjie@didichuxing.com> --- lib/librte_vhost/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 9cf34ad17..a9effa115 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -961,13 +961,12 @@ rte_vhost_driver_unregister(const char *path) int count; struct vhost_user_connection *conn, *next; +again: pthread_mutex_lock(&vhost_user.mutex); for (i = 0; i < vhost_user.vsocket_cnt; i++) { struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; - if (!strcmp(vsocket->path, path)) { -again: pthread_mutex_lock(&vsocket->conn_mutex); for (conn = TAILQ_FIRST(&vsocket->conn_list); conn != NULL; @@ -981,6 +980,7 @@ rte_vhost_driver_unregister(const char *path) */ if (fdset_try_del(&vhost_user.fdset, conn->connfd) == -1) { + pthread_mutex_unlock(&vhost_user.mutex); pthread_mutex_unlock( &vsocket->conn_mutex); goto again; -- 2.20.1
Hi Sunwenjie, Thanks for your patch. Please resend with following contribution guidelines, that can be found in doc/guides/contributing/. First, the commit message prefix should be vhost. For other comments, please see inline: On 1/8/19 12:45 PM, sunwenjie wrote: > When rte_vhost_driver_unregister delete the connection fd, fdset_try_del will > always try and donot release the vhostuser.mutex if the fd is busy, but the > fdset_event_dispatch will set the fd to busy and call vhost_user_msg_handler > to get vhostuser.mutex, which will cause deadlock. > The commit message should not be indented, and wrapped at 72 chars. > To fix it: > Unlock the vhost_user.mutex if fdset_try_del fail and relock it when > retry. You need to add your Sined-off-by with your full name. > --- > lib/librte_vhost/socket.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c > index 9cf34ad17..a9effa115 100644 > --- a/lib/librte_vhost/socket.c > +++ b/lib/librte_vhost/socket.c > @@ -961,13 +961,12 @@ rte_vhost_driver_unregister(const char *path) > int count; > struct vhost_user_connection *conn, *next; > > +again: > pthread_mutex_lock(&vhost_user.mutex); > > for (i = 0; i < vhost_user.vsocket_cnt; i++) { > struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; > - Keep this new line, it is out of the scope of the change. > if (!strcmp(vsocket->path, path)) { > -again: > pthread_mutex_lock(&vsocket->conn_mutex); > for (conn = TAILQ_FIRST(&vsocket->conn_list); > conn != NULL; > @@ -981,6 +980,7 @@ rte_vhost_driver_unregister(const char *path) > */ > if (fdset_try_del(&vhost_user.fdset, > conn->connfd) == -1) { > + pthread_mutex_unlock(&vhost_user.mutex); > pthread_mutex_unlock( > &vsocket->conn_mutex); > goto again; > Thanks, Maxime
Will do, thanks!
Maxime Coquelin <maxime.coquelin@redhat.com> 于2019年1月10日周四 下午10:48写道:
> Hi Sunwenjie,
>
> Thanks for your patch.
>
> Please resend with following contribution guidelines, that can be
> found in doc/guides/contributing/.
>
> First, the commit message prefix should be vhost.
> For other comments, please see inline:
>
>
> On 1/8/19 12:45 PM, sunwenjie wrote:
> > When rte_vhost_driver_unregister delete the connection fd,
> fdset_try_del will
> > always try and donot release the vhostuser.mutex if the fd is busy,
> but the
> > fdset_event_dispatch will set the fd to busy and call
> vhost_user_msg_handler
> > to get vhostuser.mutex, which will cause deadlock.
> >
>
> The commit message should not be indented, and wrapped at 72 chars.
>
> > To fix it:
> > Unlock the vhost_user.mutex if fdset_try_del fail and relock it when
> > retry.
>
> You need to add your Sined-off-by with your full name.
>
> > ---
> > lib/librte_vhost/socket.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
> > index 9cf34ad17..a9effa115 100644
> > --- a/lib/librte_vhost/socket.c
> > +++ b/lib/librte_vhost/socket.c
> > @@ -961,13 +961,12 @@ rte_vhost_driver_unregister(const char *path)
> > int count;
> > struct vhost_user_connection *conn, *next;
> >
> > +again:
> > pthread_mutex_lock(&vhost_user.mutex);
> >
> > for (i = 0; i < vhost_user.vsocket_cnt; i++) {
> > struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
> > -
>
> Keep this new line, it is out of the scope of the change.
>
> > if (!strcmp(vsocket->path, path)) {
> > -again:
> > pthread_mutex_lock(&vsocket->conn_mutex);
> > for (conn = TAILQ_FIRST(&vsocket->conn_list);
> > conn != NULL;
> > @@ -981,6 +980,7 @@ rte_vhost_driver_unregister(const char *path)
> > */
> > if (fdset_try_del(&vhost_user.fdset,
> > conn->connfd) == -1) {
> > +
> pthread_mutex_unlock(&vhost_user.mutex);
> > pthread_mutex_unlock(
> >
> &vsocket->conn_mutex);
> > goto again;
> >
>
> Thanks,
> Maxime
>