From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <vkaplans@redhat.com>
Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28])
 by dpdk.org (Postfix) with ESMTP id 5FF4A2B99;
 Wed,  6 Dec 2017 14:55:55 +0100 (CET)
Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com
 [10.5.11.11])
 (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
 (No client certificate requested)
 by mx1.redhat.com (Postfix) with ESMTPS id 316B66868B;
 Wed,  6 Dec 2017 13:55:55 +0000 (UTC)
Received: from redhat.com (unknown [10.35.206.54])
 by smtp.corp.redhat.com (Postfix) with SMTP id A85DB7A3AC;
 Wed,  6 Dec 2017 13:55:50 +0000 (UTC)
Date: Wed, 6 Dec 2017 15:55:49 +0200
From: Victor Kaplansky <vkaplans@redhat.com>
To: dev@dpdk.org, yliu@fridaylinux.org, tiwei.bie@intel.com,
 jianfeng.tan@intel.com, vkaplans@redhat.com
Cc: stable@dpdk.org, jfreiman@redhat.com,
 Maxime Coquelin <maxime.coquelin@redhat.com>
Message-ID: <20171206135329.652-1-vkaplans@redhat.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
X-Mutt-Fcc: =sent
X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16
 (mx1.redhat.com [10.5.110.27]); Wed, 06 Dec 2017 13:55:55 +0000 (UTC)
Subject: [dpdk-dev] [PATCH] vhost_user: protect active rings from async ring
	changes
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Wed, 06 Dec 2017 13:55:56 -0000

When performing live migration or memory hot-plugging,
the changes to the device and vrings made by message handler
done independently from vring usage by PMD threads.

This causes for example segfauls during live-migration
with MQ enable, but in general virtually any request
sent by qemu changing the state of device can cause
problems.

These patches fixes all above issues by adding a spinlock
to every vring and requiring message handler to start operation
only after ensuring that all PMD threads related to the divece
are out of critical section accessing the vring data.

Each vring has its own lock in order to not create contention
between PMD threads of different vrings and to prevent
performance degradation by scaling queue pair number.
---
 lib/librte_vhost/vhost.h      |  1 +
 lib/librte_vhost/vhost_user.c | 44 +++++++++++++++++++++++++++++++++++++++++++
 lib/librte_vhost/virtio_net.c |  8 ++++++++
 3 files changed, 53 insertions(+)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 1cc81c17..812aeccd 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -137,6 +137,7 @@ struct vhost_virtqueue {
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
 	int				iotlb_cache_nr;
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
+        rte_spinlock_t active_lock;
 } __rte_cache_aligned;
 
 /* Old kernels have no such macros defined */
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index f4c7ce46..02a3a7d3 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1190,6 +1190,46 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg)
 	return alloc_vring_queue(dev, vring_idx);
 }
 
+static void
+vhost_user_lock_all_queue_pairs(struct virtio_net *dev)
+{
+	unsigned int i = 0;
+	unsigned int vq_num = 0;
+
+	while (vq_num < dev->nr_vring) {
+		struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+		if (!vq) {
+			i++;
+			continue;
+		}
+
+		rte_spinlock_lock(&vq->active_lock);
+		vq_num++;
+		i++;
+	}
+}
+
+static void
+vhost_user_unlock_all_queue_pairs(struct virtio_net *dev)
+{
+	unsigned int i = 0;
+	unsigned int vq_num = 0;
+
+	while (vq_num < dev->nr_vring) {
+		struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+		if (!vq) {
+			i++;
+			continue;
+		}
+
+		rte_spinlock_unlock(&vq->active_lock);
+		vq_num++;
+		i++;
+	}
+}
+
 int
 vhost_user_msg_handler(int vid, int fd)
 {
@@ -1241,6 +1281,8 @@ vhost_user_msg_handler(int vid, int fd)
 		return -1;
 	}
 
+	vhost_user_lock_all_queue_pairs(dev);
+
 	switch (msg.request.master) {
 	case VHOST_USER_GET_FEATURES:
 		msg.payload.u64 = vhost_user_get_features(dev);
@@ -1342,6 +1384,8 @@ vhost_user_msg_handler(int vid, int fd)
 
 	}
 
+	vhost_user_unlock_all_queue_pairs(dev);
+
 	if (msg.flags & VHOST_USER_NEED_REPLY) {
 		msg.payload.u64 = !!ret;
 		msg.size = sizeof(msg.payload.u64);
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 6fee16e5..de7e38bb 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -44,6 +44,7 @@
 #include <rte_udp.h>
 #include <rte_sctp.h>
 #include <rte_arp.h>
+#include <rte_spinlock.h>
 
 #include "iotlb.h"
 #include "vhost.h"
@@ -1180,9 +1181,12 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	}
 
 	vq = dev->virtqueue[queue_id];
+
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
+	rte_spinlock_lock(&vq->active_lock);
+
 	vq->batch_copy_nb_elems = 0;
 
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
@@ -1240,6 +1244,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		if (rarp_mbuf == NULL) {
 			RTE_LOG(ERR, VHOST_DATA,
 				"Failed to allocate memory for mbuf.\n");
+			rte_spinlock_unlock(&vq->active_lock);
 			return 0;
 		}
 
@@ -1356,6 +1361,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 		vhost_user_iotlb_rd_unlock(vq);
 
+        rte_spinlock_unlock(&vq->active_lock);        
+	
 	if (unlikely(rarp_mbuf != NULL)) {
 		/*
 		 * Inject it to the head of "pkts" array, so that switch's mac
@@ -1366,5 +1373,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		i += 1;
 	}
 
+
 	return i;
 }
-- 
Victor