[PATCH] vhost: fix data-plane access to released vq

DPDK patches and discussions
 help / color / mirror / Atom feed

* [PATCH] vhost: fix data-plane access to released vq
@ 2021-12-03 16:34 Yuan Wang
  2022-01-26 14:02 ` Maxime Coquelin
  0 siblings, 1 reply; 5+ messages in thread
From: Yuan Wang @ 2021-12-03 16:34 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, jiayu.hu, xuan.ding, wenwux.ma, weix.ling

From: yuan wang <yuanx.wang@intel.com>

When numa reallocation occurs, numa_realoc() on the control
plane will free the old vq. If rte_vhost_dequeue_burst()
on the data plane get the vq just before release, then it
will access the released vq. We need to put the
vq->access_lock into struct virtio_net to ensure that it
can prevents this situation.

Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
---
 lib/vhost/vhost.c      | 26 +++++++++++++-------------
 lib/vhost/vhost.h      |  4 +---
 lib/vhost/vhost_user.c |  4 ++--
 lib/vhost/virtio_net.c | 16 ++++++++--------
 4 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 13a9bb9dd1..4259931be9 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -627,7 +627,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 
 		dev->virtqueue[i] = vq;
 		init_vring_queue(dev, i);
-		rte_spinlock_init(&vq->access_lock);
+		rte_spinlock_init(&dev->vq_access_lock[i]);
 		vq->avail_wrap_counter = 1;
 		vq->used_wrap_counter = 1;
 		vq->signalled_used_valid = false;
@@ -1325,7 +1325,7 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
 	if (!vq)
 		return 0;
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_spinlock_lock(&dev->vq_access_lock[queue_id]);
 
 	if (unlikely(!vq->enabled || vq->avail == NULL))
 		goto out;
@@ -1333,7 +1333,7 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
 	ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
 
 out:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[queue_id]);
 	return ret;
 }
 
@@ -1417,12 +1417,12 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 	if (!vq)
 		return -1;
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_spinlock_lock(&dev->vq_access_lock[queue_id]);
 
 	vq->notif_enable = enable;
 	ret = vhost_enable_guest_notification(dev, vq, enable);
 
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[queue_id]);
 
 	return ret;
 }
@@ -1479,7 +1479,7 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid)
 	if (vq == NULL)
 		return 0;
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_spinlock_lock(&dev->vq_access_lock[qid]);
 
 	if (unlikely(!vq->enabled || vq->avail == NULL))
 		goto out;
@@ -1487,7 +1487,7 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid)
 	ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
 
 out:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[qid]);
 	return ret;
 }
 
@@ -1721,9 +1721,9 @@ rte_vhost_async_channel_register(int vid, uint16_t queue_id,
 		ops->transfer_data == NULL))
 		return -1;
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_spinlock_lock(&dev->vq_access_lock[queue_id]);
 	ret = async_channel_register(vid, queue_id, ops);
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[queue_id]);
 
 	return ret;
 }
@@ -1784,7 +1784,7 @@ rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
 	if (!vq->async)
 		return ret;
 
-	if (!rte_spinlock_trylock(&vq->access_lock)) {
+	if (!rte_spinlock_trylock(&dev->vq_access_lock[queue_id])) {
 		VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
 			"virt queue busy.\n");
 		return -1;
@@ -1799,7 +1799,7 @@ rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
 
 	vhost_free_async_mem(vq);
 out:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[queue_id]);
 
 	return ret;
 }
@@ -1856,14 +1856,14 @@ rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
 	if (!vq->async)
 		return ret;
 
-	if (!rte_spinlock_trylock(&vq->access_lock)) {
+	if (!rte_spinlock_trylock(&dev->vq_access_lock[queue_id])) {
 		VHOST_LOG_CONFIG(DEBUG, "Failed to check in-flight packets. "
 			"virt queue busy.\n");
 		return ret;
 	}
 
 	ret = vq->async->pkts_inflight_n;
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[queue_id]);
 
 	return ret;
 }
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 7085e0885c..f85ce4fda5 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -185,9 +185,6 @@ struct vhost_virtqueue {
 	bool			access_ok;
 	bool			ready;
 
-	rte_spinlock_t		access_lock;
-
-
 	union {
 		struct vring_used_elem  *shadow_used_split;
 		struct vring_used_elem_packed *shadow_used_packed;
@@ -384,6 +381,7 @@ struct virtio_net {
 	int			extbuf;
 	int			linearbuf;
 	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
+	rte_spinlock_t		vq_access_lock[VHOST_MAX_QUEUE_PAIRS * 2];
 	struct inflight_mem_info *inflight_info;
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 	char			ifname[IF_NAME_SZ];
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index a781346c4d..305b4059bb 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2899,7 +2899,7 @@ vhost_user_lock_all_queue_pairs(struct virtio_net *dev)
 		struct vhost_virtqueue *vq = dev->virtqueue[i];
 
 		if (vq) {
-			rte_spinlock_lock(&vq->access_lock);
+			rte_spinlock_lock(&dev->vq_access_lock[i]);
 			vq_num++;
 		}
 		i++;
@@ -2916,7 +2916,7 @@ vhost_user_unlock_all_queue_pairs(struct virtio_net *dev)
 		struct vhost_virtqueue *vq = dev->virtqueue[i];
 
 		if (vq) {
-			rte_spinlock_unlock(&vq->access_lock);
+			rte_spinlock_unlock(&dev->vq_access_lock[i]);
 			vq_num++;
 		}
 		i++;
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index b3d954aab4..c5a05292ab 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1354,7 +1354,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	vq = dev->virtqueue[queue_id];
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_spinlock_lock(&dev->vq_access_lock[queue_id]);
 
 	if (unlikely(!vq->enabled))
 		goto out_access_unlock;
@@ -1380,7 +1380,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[queue_id]);
 
 	return nb_tx;
 }
@@ -1906,11 +1906,11 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
 		return 0;
 	}
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_spinlock_lock(&dev->vq_access_lock[queue_id]);
 
 	n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
 
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[queue_id]);
 
 	return n_pkts_cpl;
 }
@@ -1962,7 +1962,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
 
 	vq = dev->virtqueue[queue_id];
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_spinlock_lock(&dev->vq_access_lock[queue_id]);
 
 	if (unlikely(!vq->enabled || !vq->async))
 		goto out_access_unlock;
@@ -1990,7 +1990,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
 		vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[queue_id]);
 
 	return nb_tx;
 }
@@ -2900,7 +2900,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	vq = dev->virtqueue[queue_id];
 
-	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+	if (unlikely(rte_spinlock_trylock(&dev->vq_access_lock[queue_id]) == 0))
 		return 0;
 
 	if (unlikely(!vq->enabled)) {
@@ -2969,7 +2969,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_spinlock_unlock(&dev->vq_access_lock[queue_id]);
 
 	if (unlikely(rarp_mbuf != NULL))
 		count += 1;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vhost: fix data-plane access to released vq
  2021-12-03 16:34 [PATCH] vhost: fix data-plane access to released vq Yuan Wang
@ 2022-01-26 14:02 ` Maxime Coquelin
  2022-01-27 10:30   ` Wang, YuanX
  0 siblings, 1 reply; 5+ messages in thread
From: Maxime Coquelin @ 2022-01-26 14:02 UTC (permalink / raw)
  To: Yuan Wang, chenbo.xia; +Cc: dev, jiayu.hu, xuan.ding, wenwux.ma, weix.ling

Hi Yuan,

On 12/3/21 17:34, Yuan Wang wrote:
> From: yuan wang <yuanx.wang@intel.com>
> 
> When numa reallocation occurs, numa_realoc() on the control
> plane will free the old vq. If rte_vhost_dequeue_burst()
> on the data plane get the vq just before release, then it
> will access the released vq. We need to put the
> vq->access_lock into struct virtio_net to ensure that it
> can prevents this situation.

This patch is a fix, so the Fixes tag would be needed.

But are you really facing this issue, or this is just based on code
review?

Currently NUMA reallocation is called whenever
translate_ring_addresses() is called.

translate_ring_addresses() is primarly called at device initialization,
before the .new_device() callback is called. At that stage, there is no
risk to performa NUMA reallocation as the application is not expected to
use APIs requiring vq->access_lock acquisition.

But I agree there are possibilities that numa_realloc() gets called
while device is in running state. But even if that happened, I don't
think it is possible that numa_realloc() ends-up reallocating the
virtqueue on a different NUMA node (the vring should not have moved from
a physical memory standpoint). And if even it happened, we should be
safe because we ensure the VQ was not ready (so not usable by the
application) before proceeding with reallocation:

static struct virtio_net*
numa_realloc(struct virtio_net *dev, int index)
{
	int node, dev_node;
	struct virtio_net *old_dev;
	struct vhost_virtqueue *vq;
	struct batch_copy_elem *bce;
	struct guest_page *gp;
	struct rte_vhost_memory *mem;
	size_t mem_size;
	int ret;

	old_dev = dev;
	vq = dev->virtqueue[index];

	/*
	 * If VQ is ready, it is too late to reallocate, it certainly already
	 * happened anyway on VHOST_USER_SET_VRING_ADRR.
	 */
	if (vq->ready)
		return dev;

So, if this is fixing a real issue, I would need more details on the
issue in order to understand why vq->ready was not set when it should
have been.

On a side note, while trying to understand how you could face an issue,
I noticed that translate_ring_addresses() may be called by
vhost_user_iotlb_msg(). In that case, vq->access_lock is not held as
this is the handler for VHOST_USER_IOTLB_MSG. We may want to protect
translate_ring_addresses() calls with locking the VQ locks. I will post
a fix for it.

> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> ---
>   lib/vhost/vhost.c      | 26 +++++++++++++-------------
>   lib/vhost/vhost.h      |  4 +---
>   lib/vhost/vhost_user.c |  4 ++--
>   lib/vhost/virtio_net.c | 16 ++++++++--------
>   4 files changed, 24 insertions(+), 26 deletions(-)
> 

...

> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 7085e0885c..f85ce4fda5 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -185,9 +185,6 @@ struct vhost_virtqueue {
>   	bool			access_ok;
>   	bool			ready;
>   
> -	rte_spinlock_t		access_lock;
> -
> -
>   	union {
>   		struct vring_used_elem  *shadow_used_split;
>   		struct vring_used_elem_packed *shadow_used_packed;
> @@ -384,6 +381,7 @@ struct virtio_net {
>   	int			extbuf;
>   	int			linearbuf;
>   	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
> +	rte_spinlock_t		vq_access_lock[VHOST_MAX_QUEUE_PAIRS * 2];

The problem here is that you'll be introducing false sharing, so I
expect performance to no more scale with the number of queues.

It also consumes unnecessary memory.

>   	struct inflight_mem_info *inflight_info;
>   #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
>   	char			ifname[IF_NAME_SZ];

Thanks,
Maxime

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] vhost: fix data-plane access to released vq
  2022-01-26 14:02 ` Maxime Coquelin
@ 2022-01-27 10:30   ` Wang, YuanX
  2022-01-27 10:46     ` Maxime Coquelin
  0 siblings, 1 reply; 5+ messages in thread
From: Wang, YuanX @ 2022-01-27 10:30 UTC (permalink / raw)
  To: Maxime Coquelin, Xia, Chenbo
  Cc: dev, Hu, Jiayu, Ding, Xuan, Ma, WenwuX, Ling, WeiX

Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, January 26, 2022 10:03 PM
> To: Wang, YuanX <yuanx.wang@intel.com>; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Ling,
> WeiX <weix.ling@intel.com>
> Subject: Re: [PATCH] vhost: fix data-plane access to released vq
> 
> Hi Yuan,
> 
> On 12/3/21 17:34, Yuan Wang wrote:
> > From: yuan wang <yuanx.wang@intel.com>
> >
> > When numa reallocation occurs, numa_realoc() on the control plane will
> > free the old vq. If rte_vhost_dequeue_burst() on the data plane get
> > the vq just before release, then it will access the released vq. We
> > need to put the
> > vq->access_lock into struct virtio_net to ensure that it
> > can prevents this situation.
> 
> 
> This patch is a fix, so the Fixes tag would be needed.
> 
> But are you really facing this issue, or this is just based on code review?

This issue is run-time checked with AddressSanitizer which can be turned on by:
meson configure -Db_sanitize=address <build_dir>

> 
> Currently NUMA reallocation is called whenever
> translate_ring_addresses() is called.
> 
> translate_ring_addresses() is primarly called at device initialization, before
> the .new_device() callback is called. At that stage, there is no risk to
> performa NUMA reallocation as the application is not expected to use APIs
> requiring vq->access_lock acquisition.
> 
> But I agree there are possibilities that numa_realloc() gets called while device
> is in running state. But even if that happened, I don't think it is possible that
> numa_realloc() ends-up reallocating the virtqueue on a different NUMA
> node (the vring should not have moved from a physical memory standpoint).
> And if even it happened, we should be safe because we ensure the VQ was
> not ready (so not usable by the
> application) before proceeding with reallocation:

Here is a scenario where VQ ready has not been set:
1. run the testpmd and then start the data plane process.
2. run the front-end.
3. new_device() gets called when the first two queues are ready, even if the later queues are not.
4. when processing messages from the later queues, it may go to numa_realloc(), the ready flag has not been set and therefore can be reallocated.

If all the queues are ready before call new_deivce(), this issue does not occur.
I think maybe it is another solution.

Thanks,
Yuan

> 
> static struct virtio_net*
> numa_realloc(struct virtio_net *dev, int index) {
> 	int node, dev_node;
> 	struct virtio_net *old_dev;
> 	struct vhost_virtqueue *vq;
> 	struct batch_copy_elem *bce;
> 	struct guest_page *gp;
> 	struct rte_vhost_memory *mem;
> 	size_t mem_size;
> 	int ret;
> 
> 	old_dev = dev;
> 	vq = dev->virtqueue[index];
> 
> 	/*
> 	 * If VQ is ready, it is too late to reallocate, it certainly already
> 	 * happened anyway on VHOST_USER_SET_VRING_ADRR.
> 	 */
> 	if (vq->ready)
> 		return dev;
> 
> So, if this is fixing a real issue, I would need more details on the issue in order
> to understand why vq->ready was not set when it should have been.
> 
> On a side note, while trying to understand how you could face an issue, I
> noticed that translate_ring_addresses() may be called by
> vhost_user_iotlb_msg(). In that case, vq->access_lock is not held as this is
> the handler for VHOST_USER_IOTLB_MSG. We may want to protect
> translate_ring_addresses() calls with locking the VQ locks. I will post a fix for
> it.
> 
> > Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> > ---
> >   lib/vhost/vhost.c      | 26 +++++++++++++-------------
> >   lib/vhost/vhost.h      |  4 +---
> >   lib/vhost/vhost_user.c |  4 ++--
> >   lib/vhost/virtio_net.c | 16 ++++++++--------
> >   4 files changed, 24 insertions(+), 26 deletions(-)
> >
> 
> ...
> 
> > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> > 7085e0885c..f85ce4fda5 100644
> > --- a/lib/vhost/vhost.h
> > +++ b/lib/vhost/vhost.h
> > @@ -185,9 +185,6 @@ struct vhost_virtqueue {
> >   	bool			access_ok;
> >   	bool			ready;
> >
> > -	rte_spinlock_t		access_lock;
> > -
> > -
> >   	union {
> >   		struct vring_used_elem  *shadow_used_split;
> >   		struct vring_used_elem_packed *shadow_used_packed;
> @@ -384,6
> > +381,7 @@ struct virtio_net {
> >   	int			extbuf;
> >   	int			linearbuf;
> >   	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
> > +	rte_spinlock_t		vq_access_lock[VHOST_MAX_QUEUE_PAIRS
> * 2];
> 
> The problem here is that you'll be introducing false sharing, so I expect
> performance to no more scale with the number of queues.
> 
> It also consumes unnecessary memory.
> 
> >   	struct inflight_mem_info *inflight_info;
> >   #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
> >   	char			ifname[IF_NAME_SZ];
> 
> Thanks,
> Maxime


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vhost: fix data-plane access to released vq
  2022-01-27 10:30   ` Wang, YuanX
@ 2022-01-27 10:46     ` Maxime Coquelin
  2022-01-29  9:26       ` Wang, YuanX
  0 siblings, 1 reply; 5+ messages in thread
From: Maxime Coquelin @ 2022-01-27 10:46 UTC (permalink / raw)
  To: Wang, YuanX, Xia, Chenbo
  Cc: dev, Hu, Jiayu, Ding, Xuan, Ma, WenwuX, Ling, WeiX

Hi,

On 1/27/22 11:30, Wang, YuanX wrote:
> Hi Maxime,
> 
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Wednesday, January 26, 2022 10:03 PM
>> To: Wang, YuanX <yuanx.wang@intel.com>; Xia, Chenbo
>> <chenbo.xia@intel.com>
>> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
>> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Ling,
>> WeiX <weix.ling@intel.com>
>> Subject: Re: [PATCH] vhost: fix data-plane access to released vq
>>
>> Hi Yuan,
>>
>> On 12/3/21 17:34, Yuan Wang wrote:
>>> From: yuan wang <yuanx.wang@intel.com>
>>>
>>> When numa reallocation occurs, numa_realoc() on the control plane will
>>> free the old vq. If rte_vhost_dequeue_burst() on the data plane get
>>> the vq just before release, then it will access the released vq. We
>>> need to put the
>>> vq->access_lock into struct virtio_net to ensure that it
>>> can prevents this situation.
>>
>>
>> This patch is a fix, so the Fixes tag would be needed.
>>
>> But are you really facing this issue, or this is just based on code review?
> 
> This issue is run-time checked with AddressSanitizer which can be turned on by:
> meson configure -Db_sanitize=address <build_dir>
> 
>>
>> Currently NUMA reallocation is called whenever
>> translate_ring_addresses() is called.
>>
>> translate_ring_addresses() is primarly called at device initialization, before
>> the .new_device() callback is called. At that stage, there is no risk to
>> performa NUMA reallocation as the application is not expected to use APIs
>> requiring vq->access_lock acquisition.
>>
>> But I agree there are possibilities that numa_realloc() gets called while device
>> is in running state. But even if that happened, I don't think it is possible that
>> numa_realloc() ends-up reallocating the virtqueue on a different NUMA
>> node (the vring should not have moved from a physical memory standpoint).
>> And if even it happened, we should be safe because we ensure the VQ was
>> not ready (so not usable by the
>> application) before proceeding with reallocation:
> 
> Here is a scenario where VQ ready has not been set:
> 1. run the testpmd and then start the data plane process.
> 2. run the front-end.
> 3. new_device() gets called when the first two queues are ready, even if the later queues are not.
> 4. when processing messages from the later queues, it may go to numa_realloc(), the ready flag has not been set and therefore can be reallocated.

I will need a bit more details here.

AFAICT, if the ready flag is not set for a given virtqueue, the
virtqueue is not supposed to be exposed to the application. Is there a
case where it happens? If so, the fix should consist in ensuring the
application cannot use the virtqueue if it is not ready.

Regards,
Maxime

> 
> If all the queues are ready before call new_deivce(), this issue does not occur.
> I think maybe it is another solution.

No, that was the older behaviour but causes issues with vDPA.
We cannot just revert to older behaviour.

Thanks,
Maxime

> Thanks,
> Yuan
> 
>>
>> static struct virtio_net*
>> numa_realloc(struct virtio_net *dev, int index) {
>> 	int node, dev_node;
>> 	struct virtio_net *old_dev;
>> 	struct vhost_virtqueue *vq;
>> 	struct batch_copy_elem *bce;
>> 	struct guest_page *gp;
>> 	struct rte_vhost_memory *mem;
>> 	size_t mem_size;
>> 	int ret;
>>
>> 	old_dev = dev;
>> 	vq = dev->virtqueue[index];
>>
>> 	/*
>> 	 * If VQ is ready, it is too late to reallocate, it certainly already
>> 	 * happened anyway on VHOST_USER_SET_VRING_ADRR.
>> 	 */
>> 	if (vq->ready)
>> 		return dev;
>>
>> So, if this is fixing a real issue, I would need more details on the issue in order
>> to understand why vq->ready was not set when it should have been.
>>
>> On a side note, while trying to understand how you could face an issue, I
>> noticed that translate_ring_addresses() may be called by
>> vhost_user_iotlb_msg(). In that case, vq->access_lock is not held as this is
>> the handler for VHOST_USER_IOTLB_MSG. We may want to protect
>> translate_ring_addresses() calls with locking the VQ locks. I will post a fix for
>> it.
>>
>>> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
>>> ---
>>>    lib/vhost/vhost.c      | 26 +++++++++++++-------------
>>>    lib/vhost/vhost.h      |  4 +---
>>>    lib/vhost/vhost_user.c |  4 ++--
>>>    lib/vhost/virtio_net.c | 16 ++++++++--------
>>>    4 files changed, 24 insertions(+), 26 deletions(-)
>>>
>>
>> ...
>>
>>> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
>>> 7085e0885c..f85ce4fda5 100644
>>> --- a/lib/vhost/vhost.h
>>> +++ b/lib/vhost/vhost.h
>>> @@ -185,9 +185,6 @@ struct vhost_virtqueue {
>>>    	bool			access_ok;
>>>    	bool			ready;
>>>
>>> -	rte_spinlock_t		access_lock;
>>> -
>>> -
>>>    	union {
>>>    		struct vring_used_elem  *shadow_used_split;
>>>    		struct vring_used_elem_packed *shadow_used_packed;
>> @@ -384,6
>>> +381,7 @@ struct virtio_net {
>>>    	int			extbuf;
>>>    	int			linearbuf;
>>>    	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
>>> +	rte_spinlock_t		vq_access_lock[VHOST_MAX_QUEUE_PAIRS
>> * 2];
>>
>> The problem here is that you'll be introducing false sharing, so I expect
>> performance to no more scale with the number of queues.
>>
>> It also consumes unnecessary memory.
>>
>>>    	struct inflight_mem_info *inflight_info;
>>>    #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
>>>    	char			ifname[IF_NAME_SZ];
>>
>> Thanks,
>> Maxime
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] vhost: fix data-plane access to released vq
  2022-01-27 10:46     ` Maxime Coquelin
@ 2022-01-29  9:26       ` Wang, YuanX
  0 siblings, 0 replies; 5+ messages in thread
From: Wang, YuanX @ 2022-01-29  9:26 UTC (permalink / raw)
  To: Maxime Coquelin, Xia, Chenbo
  Cc: dev, Hu, Jiayu, Ding, Xuan, Ma, WenwuX, Ling, WeiX

Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Thursday, January 27, 2022 6:47 PM
> To: Wang, YuanX <yuanx.wang@intel.com>; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Ling,
> WeiX <weix.ling@intel.com>
> Subject: Re: [PATCH] vhost: fix data-plane access to released vq
> 
> Hi,
> 
> On 1/27/22 11:30, Wang, YuanX wrote:
> > Hi Maxime,
> >
> >> -----Original Message-----
> >> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> >> Sent: Wednesday, January 26, 2022 10:03 PM
> >> To: Wang, YuanX <yuanx.wang@intel.com>; Xia, Chenbo
> >> <chenbo.xia@intel.com>
> >> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan
> >> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Ling,
> WeiX
> >> <weix.ling@intel.com>
> >> Subject: Re: [PATCH] vhost: fix data-plane access to released vq
> >>
> >> Hi Yuan,
> >>
> >> On 12/3/21 17:34, Yuan Wang wrote:
> >>> From: yuan wang <yuanx.wang@intel.com>
> >>>
> >>> When numa reallocation occurs, numa_realoc() on the control plane
> >>> will free the old vq. If rte_vhost_dequeue_burst() on the data plane
> >>> get the vq just before release, then it will access the released vq.
> >>> We need to put the
> >>> vq->access_lock into struct virtio_net to ensure that it
> >>> can prevents this situation.
> >>
> >>
> >> This patch is a fix, so the Fixes tag would be needed.
> >>
> >> But are you really facing this issue, or this is just based on code review?
> >
> > This issue is run-time checked with AddressSanitizer which can be turned
> on by:
> > meson configure -Db_sanitize=address <build_dir>
> >
> >>
> >> Currently NUMA reallocation is called whenever
> >> translate_ring_addresses() is called.
> >>
> >> translate_ring_addresses() is primarly called at device
> >> initialization, before the .new_device() callback is called. At that
> >> stage, there is no risk to performa NUMA reallocation as the
> >> application is not expected to use APIs requiring vq->access_lock
> acquisition.
> >>
> >> But I agree there are possibilities that numa_realloc() gets called
> >> while device is in running state. But even if that happened, I don't
> >> think it is possible that
> >> numa_realloc() ends-up reallocating the virtqueue on a different NUMA
> >> node (the vring should not have moved from a physical memory
> standpoint).
> >> And if even it happened, we should be safe because we ensure the VQ
> >> was not ready (so not usable by the
> >> application) before proceeding with reallocation:
> >
> > Here is a scenario where VQ ready has not been set:
> > 1. run the testpmd and then start the data plane process.
> > 2. run the front-end.
> > 3. new_device() gets called when the first two queues are ready, even if
> the later queues are not.
> > 4. when processing messages from the later queues, it may go to
> numa_realloc(), the ready flag has not been set and therefore can be
> reallocated.
> 
> I will need a bit more details here.

For this scenario I used a QEMU as the front end and set up 8 queues with the front and back ends in different NUMA.

> 
> AFAICT, if the ready flag is not set for a given virtqueue, the virtqueue is not
> supposed to be exposed to the application. Is there a case where it happens?
> If so, the fix should consist in ensuring the application cannot use the
> virtqueue if it is not ready.
> 
> Regards,
> Maxime

Thanks for the suggestion, I will look for more details on this.

Regards,
Yuan

> 
> >
> > If all the queues are ready before call new_deivce(), this issue does not
> occur.
> > I think maybe it is another solution.
> 
> No, that was the older behaviour but causes issues with vDPA.
> We cannot just revert to older behaviour.
> 
> Thanks,
> Maxime
> 
> > Thanks,
> > Yuan
> >
> >>
> >> static struct virtio_net*
> >> numa_realloc(struct virtio_net *dev, int index) {
> >> 	int node, dev_node;
> >> 	struct virtio_net *old_dev;
> >> 	struct vhost_virtqueue *vq;
> >> 	struct batch_copy_elem *bce;
> >> 	struct guest_page *gp;
> >> 	struct rte_vhost_memory *mem;
> >> 	size_t mem_size;
> >> 	int ret;
> >>
> >> 	old_dev = dev;
> >> 	vq = dev->virtqueue[index];
> >>
> >> 	/*
> >> 	 * If VQ is ready, it is too late to reallocate, it certainly already
> >> 	 * happened anyway on VHOST_USER_SET_VRING_ADRR.
> >> 	 */
> >> 	if (vq->ready)
> >> 		return dev;
> >>
> >> So, if this is fixing a real issue, I would need more details on the
> >> issue in order to understand why vq->ready was not set when it should
> have been.
> >>
> >> On a side note, while trying to understand how you could face an
> >> issue, I noticed that translate_ring_addresses() may be called by
> >> vhost_user_iotlb_msg(). In that case, vq->access_lock is not held as
> >> this is the handler for VHOST_USER_IOTLB_MSG. We may want to protect
> >> translate_ring_addresses() calls with locking the VQ locks. I will
> >> post a fix for it.
> >>
> >>> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> >>> ---
> >>>    lib/vhost/vhost.c      | 26 +++++++++++++-------------
> >>>    lib/vhost/vhost.h      |  4 +---
> >>>    lib/vhost/vhost_user.c |  4 ++--
> >>>    lib/vhost/virtio_net.c | 16 ++++++++--------
> >>>    4 files changed, 24 insertions(+), 26 deletions(-)
> >>>
> >>
> >> ...
> >>
> >>> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> >>> 7085e0885c..f85ce4fda5 100644
> >>> --- a/lib/vhost/vhost.h
> >>> +++ b/lib/vhost/vhost.h
> >>> @@ -185,9 +185,6 @@ struct vhost_virtqueue {
> >>>    	bool			access_ok;
> >>>    	bool			ready;
> >>>
> >>> -	rte_spinlock_t		access_lock;
> >>> -
> >>> -
> >>>    	union {
> >>>    		struct vring_used_elem  *shadow_used_split;
> >>>    		struct vring_used_elem_packed *shadow_used_packed;
> >> @@ -384,6
> >>> +381,7 @@ struct virtio_net {
> >>>    	int			extbuf;
> >>>    	int			linearbuf;
> >>>    	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
> >>> +	rte_spinlock_t		vq_access_lock[VHOST_MAX_QUEUE_PAIRS
> >> * 2];
> >>
> >> The problem here is that you'll be introducing false sharing, so I
> >> expect performance to no more scale with the number of queues.
> >>
> >> It also consumes unnecessary memory.
> >>
> >>>    	struct inflight_mem_info *inflight_info;
> >>>    #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX :
> IFNAMSIZ)
> >>>    	char			ifname[IF_NAME_SZ];
> >>
> >> Thanks,
> >> Maxime
> >


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-01-29  9:26 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-03 16:34 [PATCH] vhost: fix data-plane access to released vq Yuan Wang
2022-01-26 14:02 ` Maxime Coquelin
2022-01-27 10:30   ` Wang, YuanX
2022-01-27 10:46     ` Maxime Coquelin
2022-01-29  9:26       ` Wang, YuanX

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).