From: yuan wang <yuanx.wang@intel.com> When numa reallocation occurs, numa_realoc() on the control plane will free the old vq. If rte_vhost_dequeue_burst() on the data plane get the vq just before release, then it will access the released vq. We need to put the vq->access_lock into struct virtio_net to ensure that it can prevents this situation. Signed-off-by: Yuan Wang <yuanx.wang@intel.com> --- lib/vhost/vhost.c | 26 +++++++++++++------------- lib/vhost/vhost.h | 4 +--- lib/vhost/vhost_user.c | 4 ++-- lib/vhost/virtio_net.c | 16 ++++++++-------- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index 13a9bb9dd1..4259931be9 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -627,7 +627,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) dev->virtqueue[i] = vq; init_vring_queue(dev, i); - rte_spinlock_init(&vq->access_lock); + rte_spinlock_init(&dev->vq_access_lock[i]); vq->avail_wrap_counter = 1; vq->used_wrap_counter = 1; vq->signalled_used_valid = false; @@ -1325,7 +1325,7 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id) if (!vq) return 0; - rte_spinlock_lock(&vq->access_lock); + rte_spinlock_lock(&dev->vq_access_lock[queue_id]); if (unlikely(!vq->enabled || vq->avail == NULL)) goto out; @@ -1333,7 +1333,7 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id) ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx; out: - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[queue_id]); return ret; } @@ -1417,12 +1417,12 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) if (!vq) return -1; - rte_spinlock_lock(&vq->access_lock); + rte_spinlock_lock(&dev->vq_access_lock[queue_id]); vq->notif_enable = enable; ret = vhost_enable_guest_notification(dev, vq, enable); - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[queue_id]); return ret; } @@ -1479,7 +1479,7 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid) if (vq == NULL) return 0; - rte_spinlock_lock(&vq->access_lock); + rte_spinlock_lock(&dev->vq_access_lock[qid]); if (unlikely(!vq->enabled || vq->avail == NULL)) goto out; @@ -1487,7 +1487,7 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid) ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx; out: - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[qid]); return ret; } @@ -1721,9 +1721,9 @@ rte_vhost_async_channel_register(int vid, uint16_t queue_id, ops->transfer_data == NULL)) return -1; - rte_spinlock_lock(&vq->access_lock); + rte_spinlock_lock(&dev->vq_access_lock[queue_id]); ret = async_channel_register(vid, queue_id, ops); - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[queue_id]); return ret; } @@ -1784,7 +1784,7 @@ rte_vhost_async_channel_unregister(int vid, uint16_t queue_id) if (!vq->async) return ret; - if (!rte_spinlock_trylock(&vq->access_lock)) { + if (!rte_spinlock_trylock(&dev->vq_access_lock[queue_id])) { VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. " "virt queue busy.\n"); return -1; @@ -1799,7 +1799,7 @@ rte_vhost_async_channel_unregister(int vid, uint16_t queue_id) vhost_free_async_mem(vq); out: - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[queue_id]); return ret; } @@ -1856,14 +1856,14 @@ rte_vhost_async_get_inflight(int vid, uint16_t queue_id) if (!vq->async) return ret; - if (!rte_spinlock_trylock(&vq->access_lock)) { + if (!rte_spinlock_trylock(&dev->vq_access_lock[queue_id])) { VHOST_LOG_CONFIG(DEBUG, "Failed to check in-flight packets. " "virt queue busy.\n"); return ret; } ret = vq->async->pkts_inflight_n; - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[queue_id]); return ret; } diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index 7085e0885c..f85ce4fda5 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -185,9 +185,6 @@ struct vhost_virtqueue { bool access_ok; bool ready; - rte_spinlock_t access_lock; - - union { struct vring_used_elem *shadow_used_split; struct vring_used_elem_packed *shadow_used_packed; @@ -384,6 +381,7 @@ struct virtio_net { int extbuf; int linearbuf; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; + rte_spinlock_t vq_access_lock[VHOST_MAX_QUEUE_PAIRS * 2]; struct inflight_mem_info *inflight_info; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) char ifname[IF_NAME_SZ]; diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index a781346c4d..305b4059bb 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -2899,7 +2899,7 @@ vhost_user_lock_all_queue_pairs(struct virtio_net *dev) struct vhost_virtqueue *vq = dev->virtqueue[i]; if (vq) { - rte_spinlock_lock(&vq->access_lock); + rte_spinlock_lock(&dev->vq_access_lock[i]); vq_num++; } i++; @@ -2916,7 +2916,7 @@ vhost_user_unlock_all_queue_pairs(struct virtio_net *dev) struct vhost_virtqueue *vq = dev->virtqueue[i]; if (vq) { - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[i]); vq_num++; } i++; diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index b3d954aab4..c5a05292ab 100644 --- a/lib/vhost/virtio_net.c +++ b/lib/vhost/virtio_net.c @@ -1354,7 +1354,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, vq = dev->virtqueue[queue_id]; - rte_spinlock_lock(&vq->access_lock); + rte_spinlock_lock(&dev->vq_access_lock[queue_id]); if (unlikely(!vq->enabled)) goto out_access_unlock; @@ -1380,7 +1380,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, vhost_user_iotlb_rd_unlock(vq); out_access_unlock: - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[queue_id]); return nb_tx; } @@ -1906,11 +1906,11 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, return 0; } - rte_spinlock_lock(&vq->access_lock); + rte_spinlock_lock(&dev->vq_access_lock[queue_id]); n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count); - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[queue_id]); return n_pkts_cpl; } @@ -1962,7 +1962,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, vq = dev->virtqueue[queue_id]; - rte_spinlock_lock(&vq->access_lock); + rte_spinlock_lock(&dev->vq_access_lock[queue_id]); if (unlikely(!vq->enabled || !vq->async)) goto out_access_unlock; @@ -1990,7 +1990,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, vhost_user_iotlb_rd_unlock(vq); out_access_unlock: - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[queue_id]); return nb_tx; } @@ -2900,7 +2900,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, vq = dev->virtqueue[queue_id]; - if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) + if (unlikely(rte_spinlock_trylock(&dev->vq_access_lock[queue_id]) == 0)) return 0; if (unlikely(!vq->enabled)) { @@ -2969,7 +2969,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, vhost_user_iotlb_rd_unlock(vq); out_access_unlock: - rte_spinlock_unlock(&vq->access_lock); + rte_spinlock_unlock(&dev->vq_access_lock[queue_id]); if (unlikely(rarp_mbuf != NULL)) count += 1; -- 2.25.1
Hi Yuan, On 12/3/21 17:34, Yuan Wang wrote: > From: yuan wang <yuanx.wang@intel.com> > > When numa reallocation occurs, numa_realoc() on the control > plane will free the old vq. If rte_vhost_dequeue_burst() > on the data plane get the vq just before release, then it > will access the released vq. We need to put the > vq->access_lock into struct virtio_net to ensure that it > can prevents this situation. This patch is a fix, so the Fixes tag would be needed. But are you really facing this issue, or this is just based on code review? Currently NUMA reallocation is called whenever translate_ring_addresses() is called. translate_ring_addresses() is primarly called at device initialization, before the .new_device() callback is called. At that stage, there is no risk to performa NUMA reallocation as the application is not expected to use APIs requiring vq->access_lock acquisition. But I agree there are possibilities that numa_realloc() gets called while device is in running state. But even if that happened, I don't think it is possible that numa_realloc() ends-up reallocating the virtqueue on a different NUMA node (the vring should not have moved from a physical memory standpoint). And if even it happened, we should be safe because we ensure the VQ was not ready (so not usable by the application) before proceeding with reallocation: static struct virtio_net* numa_realloc(struct virtio_net *dev, int index) { int node, dev_node; struct virtio_net *old_dev; struct vhost_virtqueue *vq; struct batch_copy_elem *bce; struct guest_page *gp; struct rte_vhost_memory *mem; size_t mem_size; int ret; old_dev = dev; vq = dev->virtqueue[index]; /* * If VQ is ready, it is too late to reallocate, it certainly already * happened anyway on VHOST_USER_SET_VRING_ADRR. */ if (vq->ready) return dev; So, if this is fixing a real issue, I would need more details on the issue in order to understand why vq->ready was not set when it should have been. On a side note, while trying to understand how you could face an issue, I noticed that translate_ring_addresses() may be called by vhost_user_iotlb_msg(). In that case, vq->access_lock is not held as this is the handler for VHOST_USER_IOTLB_MSG. We may want to protect translate_ring_addresses() calls with locking the VQ locks. I will post a fix for it. > Signed-off-by: Yuan Wang <yuanx.wang@intel.com> > --- > lib/vhost/vhost.c | 26 +++++++++++++------------- > lib/vhost/vhost.h | 4 +--- > lib/vhost/vhost_user.c | 4 ++-- > lib/vhost/virtio_net.c | 16 ++++++++-------- > 4 files changed, 24 insertions(+), 26 deletions(-) > ... > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h > index 7085e0885c..f85ce4fda5 100644 > --- a/lib/vhost/vhost.h > +++ b/lib/vhost/vhost.h > @@ -185,9 +185,6 @@ struct vhost_virtqueue { > bool access_ok; > bool ready; > > - rte_spinlock_t access_lock; > - > - > union { > struct vring_used_elem *shadow_used_split; > struct vring_used_elem_packed *shadow_used_packed; > @@ -384,6 +381,7 @@ struct virtio_net { > int extbuf; > int linearbuf; > struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; > + rte_spinlock_t vq_access_lock[VHOST_MAX_QUEUE_PAIRS * 2]; The problem here is that you'll be introducing false sharing, so I expect performance to no more scale with the number of queues. It also consumes unnecessary memory. > struct inflight_mem_info *inflight_info; > #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) > char ifname[IF_NAME_SZ]; Thanks, Maxime
Hi Maxime, > -----Original Message----- > From: Maxime Coquelin <maxime.coquelin@redhat.com> > Sent: Wednesday, January 26, 2022 10:03 PM > To: Wang, YuanX <yuanx.wang@intel.com>; Xia, Chenbo > <chenbo.xia@intel.com> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan > <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Ling, > WeiX <weix.ling@intel.com> > Subject: Re: [PATCH] vhost: fix data-plane access to released vq > > Hi Yuan, > > On 12/3/21 17:34, Yuan Wang wrote: > > From: yuan wang <yuanx.wang@intel.com> > > > > When numa reallocation occurs, numa_realoc() on the control plane will > > free the old vq. If rte_vhost_dequeue_burst() on the data plane get > > the vq just before release, then it will access the released vq. We > > need to put the > > vq->access_lock into struct virtio_net to ensure that it > > can prevents this situation. > > > This patch is a fix, so the Fixes tag would be needed. > > But are you really facing this issue, or this is just based on code review? This issue is run-time checked with AddressSanitizer which can be turned on by: meson configure -Db_sanitize=address <build_dir> > > Currently NUMA reallocation is called whenever > translate_ring_addresses() is called. > > translate_ring_addresses() is primarly called at device initialization, before > the .new_device() callback is called. At that stage, there is no risk to > performa NUMA reallocation as the application is not expected to use APIs > requiring vq->access_lock acquisition. > > But I agree there are possibilities that numa_realloc() gets called while device > is in running state. But even if that happened, I don't think it is possible that > numa_realloc() ends-up reallocating the virtqueue on a different NUMA > node (the vring should not have moved from a physical memory standpoint). > And if even it happened, we should be safe because we ensure the VQ was > not ready (so not usable by the > application) before proceeding with reallocation: Here is a scenario where VQ ready has not been set: 1. run the testpmd and then start the data plane process. 2. run the front-end. 3. new_device() gets called when the first two queues are ready, even if the later queues are not. 4. when processing messages from the later queues, it may go to numa_realloc(), the ready flag has not been set and therefore can be reallocated. If all the queues are ready before call new_deivce(), this issue does not occur. I think maybe it is another solution. Thanks, Yuan > > static struct virtio_net* > numa_realloc(struct virtio_net *dev, int index) { > int node, dev_node; > struct virtio_net *old_dev; > struct vhost_virtqueue *vq; > struct batch_copy_elem *bce; > struct guest_page *gp; > struct rte_vhost_memory *mem; > size_t mem_size; > int ret; > > old_dev = dev; > vq = dev->virtqueue[index]; > > /* > * If VQ is ready, it is too late to reallocate, it certainly already > * happened anyway on VHOST_USER_SET_VRING_ADRR. > */ > if (vq->ready) > return dev; > > So, if this is fixing a real issue, I would need more details on the issue in order > to understand why vq->ready was not set when it should have been. > > On a side note, while trying to understand how you could face an issue, I > noticed that translate_ring_addresses() may be called by > vhost_user_iotlb_msg(). In that case, vq->access_lock is not held as this is > the handler for VHOST_USER_IOTLB_MSG. We may want to protect > translate_ring_addresses() calls with locking the VQ locks. I will post a fix for > it. > > > Signed-off-by: Yuan Wang <yuanx.wang@intel.com> > > --- > > lib/vhost/vhost.c | 26 +++++++++++++------------- > > lib/vhost/vhost.h | 4 +--- > > lib/vhost/vhost_user.c | 4 ++-- > > lib/vhost/virtio_net.c | 16 ++++++++-------- > > 4 files changed, 24 insertions(+), 26 deletions(-) > > > > ... > > > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index > > 7085e0885c..f85ce4fda5 100644 > > --- a/lib/vhost/vhost.h > > +++ b/lib/vhost/vhost.h > > @@ -185,9 +185,6 @@ struct vhost_virtqueue { > > bool access_ok; > > bool ready; > > > > - rte_spinlock_t access_lock; > > - > > - > > union { > > struct vring_used_elem *shadow_used_split; > > struct vring_used_elem_packed *shadow_used_packed; > @@ -384,6 > > +381,7 @@ struct virtio_net { > > int extbuf; > > int linearbuf; > > struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; > > + rte_spinlock_t vq_access_lock[VHOST_MAX_QUEUE_PAIRS > * 2]; > > The problem here is that you'll be introducing false sharing, so I expect > performance to no more scale with the number of queues. > > It also consumes unnecessary memory. > > > struct inflight_mem_info *inflight_info; > > #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) > > char ifname[IF_NAME_SZ]; > > Thanks, > Maxime
Hi, On 1/27/22 11:30, Wang, YuanX wrote: > Hi Maxime, > >> -----Original Message----- >> From: Maxime Coquelin <maxime.coquelin@redhat.com> >> Sent: Wednesday, January 26, 2022 10:03 PM >> To: Wang, YuanX <yuanx.wang@intel.com>; Xia, Chenbo >> <chenbo.xia@intel.com> >> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan >> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Ling, >> WeiX <weix.ling@intel.com> >> Subject: Re: [PATCH] vhost: fix data-plane access to released vq >> >> Hi Yuan, >> >> On 12/3/21 17:34, Yuan Wang wrote: >>> From: yuan wang <yuanx.wang@intel.com> >>> >>> When numa reallocation occurs, numa_realoc() on the control plane will >>> free the old vq. If rte_vhost_dequeue_burst() on the data plane get >>> the vq just before release, then it will access the released vq. We >>> need to put the >>> vq->access_lock into struct virtio_net to ensure that it >>> can prevents this situation. >> >> >> This patch is a fix, so the Fixes tag would be needed. >> >> But are you really facing this issue, or this is just based on code review? > > This issue is run-time checked with AddressSanitizer which can be turned on by: > meson configure -Db_sanitize=address <build_dir> > >> >> Currently NUMA reallocation is called whenever >> translate_ring_addresses() is called. >> >> translate_ring_addresses() is primarly called at device initialization, before >> the .new_device() callback is called. At that stage, there is no risk to >> performa NUMA reallocation as the application is not expected to use APIs >> requiring vq->access_lock acquisition. >> >> But I agree there are possibilities that numa_realloc() gets called while device >> is in running state. But even if that happened, I don't think it is possible that >> numa_realloc() ends-up reallocating the virtqueue on a different NUMA >> node (the vring should not have moved from a physical memory standpoint). >> And if even it happened, we should be safe because we ensure the VQ was >> not ready (so not usable by the >> application) before proceeding with reallocation: > > Here is a scenario where VQ ready has not been set: > 1. run the testpmd and then start the data plane process. > 2. run the front-end. > 3. new_device() gets called when the first two queues are ready, even if the later queues are not. > 4. when processing messages from the later queues, it may go to numa_realloc(), the ready flag has not been set and therefore can be reallocated. I will need a bit more details here. AFAICT, if the ready flag is not set for a given virtqueue, the virtqueue is not supposed to be exposed to the application. Is there a case where it happens? If so, the fix should consist in ensuring the application cannot use the virtqueue if it is not ready. Regards, Maxime > > If all the queues are ready before call new_deivce(), this issue does not occur. > I think maybe it is another solution. No, that was the older behaviour but causes issues with vDPA. We cannot just revert to older behaviour. Thanks, Maxime > Thanks, > Yuan > >> >> static struct virtio_net* >> numa_realloc(struct virtio_net *dev, int index) { >> int node, dev_node; >> struct virtio_net *old_dev; >> struct vhost_virtqueue *vq; >> struct batch_copy_elem *bce; >> struct guest_page *gp; >> struct rte_vhost_memory *mem; >> size_t mem_size; >> int ret; >> >> old_dev = dev; >> vq = dev->virtqueue[index]; >> >> /* >> * If VQ is ready, it is too late to reallocate, it certainly already >> * happened anyway on VHOST_USER_SET_VRING_ADRR. >> */ >> if (vq->ready) >> return dev; >> >> So, if this is fixing a real issue, I would need more details on the issue in order >> to understand why vq->ready was not set when it should have been. >> >> On a side note, while trying to understand how you could face an issue, I >> noticed that translate_ring_addresses() may be called by >> vhost_user_iotlb_msg(). In that case, vq->access_lock is not held as this is >> the handler for VHOST_USER_IOTLB_MSG. We may want to protect >> translate_ring_addresses() calls with locking the VQ locks. I will post a fix for >> it. >> >>> Signed-off-by: Yuan Wang <yuanx.wang@intel.com> >>> --- >>> lib/vhost/vhost.c | 26 +++++++++++++------------- >>> lib/vhost/vhost.h | 4 +--- >>> lib/vhost/vhost_user.c | 4 ++-- >>> lib/vhost/virtio_net.c | 16 ++++++++-------- >>> 4 files changed, 24 insertions(+), 26 deletions(-) >>> >> >> ... >> >>> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index >>> 7085e0885c..f85ce4fda5 100644 >>> --- a/lib/vhost/vhost.h >>> +++ b/lib/vhost/vhost.h >>> @@ -185,9 +185,6 @@ struct vhost_virtqueue { >>> bool access_ok; >>> bool ready; >>> >>> - rte_spinlock_t access_lock; >>> - >>> - >>> union { >>> struct vring_used_elem *shadow_used_split; >>> struct vring_used_elem_packed *shadow_used_packed; >> @@ -384,6 >>> +381,7 @@ struct virtio_net { >>> int extbuf; >>> int linearbuf; >>> struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; >>> + rte_spinlock_t vq_access_lock[VHOST_MAX_QUEUE_PAIRS >> * 2]; >> >> The problem here is that you'll be introducing false sharing, so I expect >> performance to no more scale with the number of queues. >> >> It also consumes unnecessary memory. >> >>> struct inflight_mem_info *inflight_info; >>> #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) >>> char ifname[IF_NAME_SZ]; >> >> Thanks, >> Maxime >
Hi Maxime, > -----Original Message----- > From: Maxime Coquelin <maxime.coquelin@redhat.com> > Sent: Thursday, January 27, 2022 6:47 PM > To: Wang, YuanX <yuanx.wang@intel.com>; Xia, Chenbo > <chenbo.xia@intel.com> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan > <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Ling, > WeiX <weix.ling@intel.com> > Subject: Re: [PATCH] vhost: fix data-plane access to released vq > > Hi, > > On 1/27/22 11:30, Wang, YuanX wrote: > > Hi Maxime, > > > >> -----Original Message----- > >> From: Maxime Coquelin <maxime.coquelin@redhat.com> > >> Sent: Wednesday, January 26, 2022 10:03 PM > >> To: Wang, YuanX <yuanx.wang@intel.com>; Xia, Chenbo > >> <chenbo.xia@intel.com> > >> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan > >> <xuan.ding@intel.com>; Ma, WenwuX <wenwux.ma@intel.com>; Ling, > WeiX > >> <weix.ling@intel.com> > >> Subject: Re: [PATCH] vhost: fix data-plane access to released vq > >> > >> Hi Yuan, > >> > >> On 12/3/21 17:34, Yuan Wang wrote: > >>> From: yuan wang <yuanx.wang@intel.com> > >>> > >>> When numa reallocation occurs, numa_realoc() on the control plane > >>> will free the old vq. If rte_vhost_dequeue_burst() on the data plane > >>> get the vq just before release, then it will access the released vq. > >>> We need to put the > >>> vq->access_lock into struct virtio_net to ensure that it > >>> can prevents this situation. > >> > >> > >> This patch is a fix, so the Fixes tag would be needed. > >> > >> But are you really facing this issue, or this is just based on code review? > > > > This issue is run-time checked with AddressSanitizer which can be turned > on by: > > meson configure -Db_sanitize=address <build_dir> > > > >> > >> Currently NUMA reallocation is called whenever > >> translate_ring_addresses() is called. > >> > >> translate_ring_addresses() is primarly called at device > >> initialization, before the .new_device() callback is called. At that > >> stage, there is no risk to performa NUMA reallocation as the > >> application is not expected to use APIs requiring vq->access_lock > acquisition. > >> > >> But I agree there are possibilities that numa_realloc() gets called > >> while device is in running state. But even if that happened, I don't > >> think it is possible that > >> numa_realloc() ends-up reallocating the virtqueue on a different NUMA > >> node (the vring should not have moved from a physical memory > standpoint). > >> And if even it happened, we should be safe because we ensure the VQ > >> was not ready (so not usable by the > >> application) before proceeding with reallocation: > > > > Here is a scenario where VQ ready has not been set: > > 1. run the testpmd and then start the data plane process. > > 2. run the front-end. > > 3. new_device() gets called when the first two queues are ready, even if > the later queues are not. > > 4. when processing messages from the later queues, it may go to > numa_realloc(), the ready flag has not been set and therefore can be > reallocated. > > I will need a bit more details here. For this scenario I used a QEMU as the front end and set up 8 queues with the front and back ends in different NUMA. > > AFAICT, if the ready flag is not set for a given virtqueue, the virtqueue is not > supposed to be exposed to the application. Is there a case where it happens? > If so, the fix should consist in ensuring the application cannot use the > virtqueue if it is not ready. > > Regards, > Maxime Thanks for the suggestion, I will look for more details on this. Regards, Yuan > > > > > If all the queues are ready before call new_deivce(), this issue does not > occur. > > I think maybe it is another solution. > > No, that was the older behaviour but causes issues with vDPA. > We cannot just revert to older behaviour. > > Thanks, > Maxime > > > Thanks, > > Yuan > > > >> > >> static struct virtio_net* > >> numa_realloc(struct virtio_net *dev, int index) { > >> int node, dev_node; > >> struct virtio_net *old_dev; > >> struct vhost_virtqueue *vq; > >> struct batch_copy_elem *bce; > >> struct guest_page *gp; > >> struct rte_vhost_memory *mem; > >> size_t mem_size; > >> int ret; > >> > >> old_dev = dev; > >> vq = dev->virtqueue[index]; > >> > >> /* > >> * If VQ is ready, it is too late to reallocate, it certainly already > >> * happened anyway on VHOST_USER_SET_VRING_ADRR. > >> */ > >> if (vq->ready) > >> return dev; > >> > >> So, if this is fixing a real issue, I would need more details on the > >> issue in order to understand why vq->ready was not set when it should > have been. > >> > >> On a side note, while trying to understand how you could face an > >> issue, I noticed that translate_ring_addresses() may be called by > >> vhost_user_iotlb_msg(). In that case, vq->access_lock is not held as > >> this is the handler for VHOST_USER_IOTLB_MSG. We may want to protect > >> translate_ring_addresses() calls with locking the VQ locks. I will > >> post a fix for it. > >> > >>> Signed-off-by: Yuan Wang <yuanx.wang@intel.com> > >>> --- > >>> lib/vhost/vhost.c | 26 +++++++++++++------------- > >>> lib/vhost/vhost.h | 4 +--- > >>> lib/vhost/vhost_user.c | 4 ++-- > >>> lib/vhost/virtio_net.c | 16 ++++++++-------- > >>> 4 files changed, 24 insertions(+), 26 deletions(-) > >>> > >> > >> ... > >> > >>> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index > >>> 7085e0885c..f85ce4fda5 100644 > >>> --- a/lib/vhost/vhost.h > >>> +++ b/lib/vhost/vhost.h > >>> @@ -185,9 +185,6 @@ struct vhost_virtqueue { > >>> bool access_ok; > >>> bool ready; > >>> > >>> - rte_spinlock_t access_lock; > >>> - > >>> - > >>> union { > >>> struct vring_used_elem *shadow_used_split; > >>> struct vring_used_elem_packed *shadow_used_packed; > >> @@ -384,6 > >>> +381,7 @@ struct virtio_net { > >>> int extbuf; > >>> int linearbuf; > >>> struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; > >>> + rte_spinlock_t vq_access_lock[VHOST_MAX_QUEUE_PAIRS > >> * 2]; > >> > >> The problem here is that you'll be introducing false sharing, so I > >> expect performance to no more scale with the number of queues. > >> > >> It also consumes unnecessary memory. > >> > >>> struct inflight_mem_info *inflight_info; > >>> #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : > IFNAMSIZ) > >>> char ifname[IF_NAME_SZ]; > >> > >> Thanks, > >> Maxime > >