Defer shadow ring update will help overall throughput when frontend much slower than backend. But that is not all the cases we faced now. In case like ovs-dpdk + dpdk virtio user, frontend will much faster than backend. Frontend may not be able to collect available descs when shadow update is deferred. Thus will harm RFC2544 performance. Solution is just remove deferred shadow update, which will help RFC2544 and fix potential issue with virtio net driver. Signed-off-by: Marvin Liu <yong.liu@intel.com> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 37c47c7dc..2ba0575a7 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -382,25 +382,6 @@ vhost_shadow_enqueue_single_packed(struct virtio_net *dev, } } -static __rte_always_inline void -vhost_flush_dequeue_packed(struct virtio_net *dev, - struct vhost_virtqueue *vq) -{ - int shadow_count; - if (!vq->shadow_used_idx) - return; - - shadow_count = vq->last_used_idx - vq->shadow_last_used_idx; - if (shadow_count <= 0) - shadow_count += vq->size; - - if ((uint32_t)shadow_count >= (vq->size - MAX_PKT_BURST)) { - do_data_copy_dequeue(vq); - vhost_flush_dequeue_shadow_packed(dev, vq); - vhost_vring_call_packed(dev, vq); - } -} - /* avoid write operation when necessary, to lessen cache issues */ #define ASSIGN_UNLESS_EQUAL(var, val) do { \ if ((var) != (val)) \ @@ -2133,20 +2114,6 @@ virtio_dev_tx_packed_zmbuf(struct virtio_net *dev, return pkt_idx; } -static __rte_always_inline bool -next_desc_is_avail(const struct vhost_virtqueue *vq) -{ - bool wrap_counter = vq->avail_wrap_counter; - uint16_t next_used_idx = vq->last_used_idx + 1; - - if (next_used_idx >= vq->size) { - next_used_idx -= vq->size; - wrap_counter ^= 1; - } - - return desc_is_avail(&vq->desc_packed[next_used_idx], wrap_counter); -} - static __rte_noinline uint16_t virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, @@ -2163,7 +2130,6 @@ virtio_dev_tx_packed(struct virtio_net *dev, if (remained >= PACKED_BATCH_SIZE) { if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool, &pkts[pkt_idx])) { - vhost_flush_dequeue_packed(dev, vq); pkt_idx += PACKED_BATCH_SIZE; remained -= PACKED_BATCH_SIZE; continue; @@ -2173,7 +2139,6 @@ virtio_dev_tx_packed(struct virtio_net *dev, if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, &pkts[pkt_idx])) break; - vhost_flush_dequeue_packed(dev, vq); pkt_idx++; remained--; @@ -2182,15 +2147,8 @@ virtio_dev_tx_packed(struct virtio_net *dev, if (vq->shadow_used_idx) { do_data_copy_dequeue(vq); - if (remained && !next_desc_is_avail(vq)) { - /* - * The guest may be waiting to TX some buffers to - * enqueue more to avoid bufferfloat, so we try to - * reduce latency here. - */ - vhost_flush_dequeue_shadow_packed(dev, vq); - vhost_vring_call_packed(dev, vq); - } + vhost_flush_dequeue_shadow_packed(dev, vq); + vhost_vring_call_packed(dev, vq); } return pkt_idx; -- 2.17.1
Tested-by: Wang, Yinan <yinan.wang@intel.com>
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> Sent: 2020年4月2日 5:29
> To: maxime.coquelin@redhat.com; Ye, Xiaolong <xiaolong.ye@intel.com>;
> Wang, Zhihong <zhihong.wang@intel.com>; eperezma@redhat.com
> Cc: dev@dpdk.org; Liu, Yong <yong.liu@intel.com>
> Subject: [dpdk-dev] [PATCH] vhost: remove deferred shadow update
>
> Defer shadow ring update will help overall throughput when frontend much
> slower than backend. But that is not all the cases we faced now.
> In case like ovs-dpdk + dpdk virtio user, frontend will much faster than
> backend. Frontend may not be able to collect available descs when shadow
> update is deferred. Thus will harm RFC2544 performance.
>
> Solution is just remove deferred shadow update, which will help RFC2544
> and fix potential issue with virtio net driver.
>
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
>
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index
> 37c47c7dc..2ba0575a7 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -382,25 +382,6 @@ vhost_shadow_enqueue_single_packed(struct
> virtio_net *dev,
> }
> }
>
> -static __rte_always_inline void
> -vhost_flush_dequeue_packed(struct virtio_net *dev,
> - struct vhost_virtqueue *vq)
> -{
> - int shadow_count;
> - if (!vq->shadow_used_idx)
> - return;
> -
> - shadow_count = vq->last_used_idx - vq->shadow_last_used_idx;
> - if (shadow_count <= 0)
> - shadow_count += vq->size;
> -
> - if ((uint32_t)shadow_count >= (vq->size - MAX_PKT_BURST)) {
> - do_data_copy_dequeue(vq);
> - vhost_flush_dequeue_shadow_packed(dev, vq);
> - vhost_vring_call_packed(dev, vq);
> - }
> -}
> -
> /* avoid write operation when necessary, to lessen cache issues */
> #define ASSIGN_UNLESS_EQUAL(var, val) do { \
> if ((var) != (val)) \
> @@ -2133,20 +2114,6 @@ virtio_dev_tx_packed_zmbuf(struct virtio_net
> *dev,
> return pkt_idx;
> }
>
> -static __rte_always_inline bool
> -next_desc_is_avail(const struct vhost_virtqueue *vq) -{
> - bool wrap_counter = vq->avail_wrap_counter;
> - uint16_t next_used_idx = vq->last_used_idx + 1;
> -
> - if (next_used_idx >= vq->size) {
> - next_used_idx -= vq->size;
> - wrap_counter ^= 1;
> - }
> -
> - return desc_is_avail(&vq->desc_packed[next_used_idx],
> wrap_counter);
> -}
> -
> static __rte_noinline uint16_t
> virtio_dev_tx_packed(struct virtio_net *dev,
> struct vhost_virtqueue *vq,
> @@ -2163,7 +2130,6 @@ virtio_dev_tx_packed(struct virtio_net *dev,
> if (remained >= PACKED_BATCH_SIZE) {
> if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool,
> &pkts[pkt_idx])) {
> - vhost_flush_dequeue_packed(dev, vq);
> pkt_idx += PACKED_BATCH_SIZE;
> remained -= PACKED_BATCH_SIZE;
> continue;
> @@ -2173,7 +2139,6 @@ virtio_dev_tx_packed(struct virtio_net *dev,
> if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
> &pkts[pkt_idx]))
> break;
> - vhost_flush_dequeue_packed(dev, vq);
> pkt_idx++;
> remained--;
>
> @@ -2182,15 +2147,8 @@ virtio_dev_tx_packed(struct virtio_net *dev,
> if (vq->shadow_used_idx) {
> do_data_copy_dequeue(vq);
>
> - if (remained && !next_desc_is_avail(vq)) {
> - /*
> - * The guest may be waiting to TX some buffers to
> - * enqueue more to avoid bufferfloat, so we try to
> - * reduce latency here.
> - */
> - vhost_flush_dequeue_shadow_packed(dev, vq);
> - vhost_vring_call_packed(dev, vq);
> - }
> + vhost_flush_dequeue_shadow_packed(dev, vq);
> + vhost_vring_call_packed(dev, vq);
> }
>
> return pkt_idx;
> --
> 2.17.1
On 4/1/20 11:29 PM, Marvin Liu wrote: > Defer shadow ring update will help overall throughput when frontend > much slower than backend. But that is not all the cases we faced now. > In case like ovs-dpdk + dpdk virtio user, frontend will much faster > than backend. Frontend may not be able to collect available descs when > shadow update is deferred. Thus will harm RFC2544 performance. I don't understand this comment. What is the difference in term of performance between Qemu + Virtio PMD and Virtio-User PMD, as the datapath is the same? > Solution is just remove deferred shadow update, which will help RFC2544 > and fix potential issue with virtio net driver. What is the potential issue? Maxime
> -----Original Message----- > From: Maxime Coquelin <maxime.coquelin@redhat.com> > Sent: Wednesday, April 15, 2020 10:16 PM > To: Liu, Yong <yong.liu@intel.com>; Ye, Xiaolong <xiaolong.ye@intel.com>; > Wang, Zhihong <zhihong.wang@intel.com>; eperezma@redhat.com > Cc: dev@dpdk.org > Subject: Re: [PATCH] vhost: remove deferred shadow update > > > > On 4/1/20 11:29 PM, Marvin Liu wrote: > > Defer shadow ring update will help overall throughput when frontend > > much slower than backend. But that is not all the cases we faced now. > > In case like ovs-dpdk + dpdk virtio user, frontend will much faster > > than backend. Frontend may not be able to collect available descs when > > shadow update is deferred. Thus will harm RFC2544 performance. > > I don't understand this comment. What is the difference in term of > performance between Qemu + Virtio PMD and Virtio-User PMD, as the > datapath is the same? > Hi Maxime, The statement is for the different situations between virtio-net + vhost pmd and virtio-user + vhost pmd in ovs. When combination is virtio-user + vhost pmd in ovs, frontend will be much faster than backend. Defer used ring update won't give benefit when requiring zero packet loss. Regards, Marvin > > Solution is just remove deferred shadow update, which will help RFC2544 > > and fix potential issue with virtio net driver. > > What is the potential issue? > > Maxime It is napi stops issue which has been fixed by Eugenio.
Hi Marvin, On 4/15/20 4:55 PM, Liu, Yong wrote: > > >> -----Original Message----- >> From: Maxime Coquelin <maxime.coquelin@redhat.com> >> Sent: Wednesday, April 15, 2020 10:16 PM >> To: Liu, Yong <yong.liu@intel.com>; Ye, Xiaolong <xiaolong.ye@intel.com>; >> Wang, Zhihong <zhihong.wang@intel.com>; eperezma@redhat.com >> Cc: dev@dpdk.org >> Subject: Re: [PATCH] vhost: remove deferred shadow update >> >> >> >> On 4/1/20 11:29 PM, Marvin Liu wrote: >>> Defer shadow ring update will help overall throughput when frontend >>> much slower than backend. But that is not all the cases we faced now. >>> In case like ovs-dpdk + dpdk virtio user, frontend will much faster >>> than backend. Frontend may not be able to collect available descs when >>> shadow update is deferred. Thus will harm RFC2544 performance. >> >> I don't understand this comment. What is the difference in term of >> performance between Qemu + Virtio PMD and Virtio-User PMD, as the >> datapath is the same? >> > > Hi Maxime, > The statement is for the different situations between virtio-net + vhost pmd and virtio-user + vhost pmd in ovs. > When combination is virtio-user + vhost pmd in ovs, frontend will be much faster than backend. Defer used ring update won't give benefit when requiring zero packet loss. Ok, so you mean Virtio PMD vs. Virtio-net kernel driver. Regarding who is faster between Virtio PMD and Vhost PMD, it actually depends on what the applications using them are doing. If you have OVS on host + testpmd on guest doing IO fowarding, then of course the frontent is much faster. But if you have testpmd IO forward on host + tespmd MACSWAP forward in guest, then the frontend could be slower. That looks like a benchmark optimization only. > Regards, > Marvin > >>> Solution is just remove deferred shadow update, which will help RFC2544 >>> and fix potential issue with virtio net driver. >> >> What is the potential issue? >> >> Maxime > > It is napi stops issue which has been fixed by Eugenio. OK, then I would suggest to change the patch title to: "vhost: fix shadow update" Then explicit the commit message to point to Eugenio's bug, and tag it with the proper Fixes tag, so that the patch gets backported to 19.11 LTS. Thanks, Maxime
> -----Original Message----- > From: Maxime Coquelin <maxime.coquelin@redhat.com> > Sent: Wednesday, April 15, 2020 11:04 PM > To: Liu, Yong <yong.liu@intel.com>; Ye, Xiaolong <xiaolong.ye@intel.com>; > Wang, Zhihong <zhihong.wang@intel.com>; eperezma@redhat.com > Cc: dev@dpdk.org > Subject: Re: [PATCH] vhost: remove deferred shadow update > > Hi Marvin, > > On 4/15/20 4:55 PM, Liu, Yong wrote: > > > > > >> -----Original Message----- > >> From: Maxime Coquelin <maxime.coquelin@redhat.com> > >> Sent: Wednesday, April 15, 2020 10:16 PM > >> To: Liu, Yong <yong.liu@intel.com>; Ye, Xiaolong > <xiaolong.ye@intel.com>; > >> Wang, Zhihong <zhihong.wang@intel.com>; eperezma@redhat.com > >> Cc: dev@dpdk.org > >> Subject: Re: [PATCH] vhost: remove deferred shadow update > >> > >> > >> > >> On 4/1/20 11:29 PM, Marvin Liu wrote: > >>> Defer shadow ring update will help overall throughput when frontend > >>> much slower than backend. But that is not all the cases we faced now. > >>> In case like ovs-dpdk + dpdk virtio user, frontend will much faster > >>> than backend. Frontend may not be able to collect available descs > when > >>> shadow update is deferred. Thus will harm RFC2544 performance. > >> > >> I don't understand this comment. What is the difference in term of > >> performance between Qemu + Virtio PMD and Virtio-User PMD, as the > >> datapath is the same? > >> > > > > Hi Maxime, > > The statement is for the different situations between virtio-net + vhost > pmd and virtio-user + vhost pmd in ovs. > > When combination is virtio-user + vhost pmd in ovs, frontend will be > much faster than backend. Defer used ring update won't give benefit when > requiring zero packet loss. > > Ok, so you mean Virtio PMD vs. Virtio-net kernel driver. > > Regarding who is faster between Virtio PMD and Vhost PMD, it actually > depends on what the applications using them are doing. > > If you have OVS on host + testpmd on guest doing IO fowarding, then of > course the frontent is much faster. > > But if you have testpmd IO forward on host + tespmd MACSWAP forward in > guest, then the frontend could be slower. > > That looks like a benchmark optimization only. > Maxime, IMHO, it will be more like performance bug fix. Defer shadow ring update method brings performance issue in certain case. Thanks, Marvin > > Regards, > > Marvin > > > >>> Solution is just remove deferred shadow update, which will help > RFC2544 > >>> and fix potential issue with virtio net driver. > >> > >> What is the potential issue? > >> > >> Maxime > > > > It is napi stops issue which has been fixed by Eugenio. > > OK, then I would suggest to change the patch title to: > "vhost: fix shadow update" > > Then explicit the commit message to point to Eugenio's bug, and tag it > with the proper Fixes tag, so that the patch gets backported to 19.11 > LTS. > Thanks, will do it in next version. > Thanks, > Maxime
Defer shadow ring update introduces functional issue which has been described in Eugenio's fix patch. The current implementation of vhost_net in packed vring tries to fill the shadow vector before send any actual changes to the guest. While this can be beneficial for the throughput, it conflicts with some bufferfloats methods like the linux kernel napi, that stops transmitting packets if there are too much bytes/buffers in the driver. It also introduces performance issue when frontend run much faster than backend. Frontend may not be able to collect available descs when shadow update is deferred. That will harm RFC2544 throughput. Appropriate choice is to remove deferred shadowed update method. Now shadowed used descs are flushed at the end of dequeue function. Fixes: 31d6c6a5b820 ("vhost: optimize packed ring dequeue") Cc: stable@dpdk.org Signed-off-by: Marvin Liu <yong.liu@intel.com> Tested-by: Wang, Yinan <yinan.wang@intel.com> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 210415904..4a7531943 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -382,25 +382,6 @@ vhost_shadow_enqueue_single_packed(struct virtio_net *dev, } } -static __rte_always_inline void -vhost_flush_dequeue_packed(struct virtio_net *dev, - struct vhost_virtqueue *vq) -{ - int shadow_count; - if (!vq->shadow_used_idx) - return; - - shadow_count = vq->last_used_idx - vq->shadow_last_used_idx; - if (shadow_count <= 0) - shadow_count += vq->size; - - if ((uint32_t)shadow_count >= (vq->size - MAX_PKT_BURST)) { - do_data_copy_dequeue(vq); - vhost_flush_dequeue_shadow_packed(dev, vq); - vhost_vring_call_packed(dev, vq); - } -} - /* avoid write operation when necessary, to lessen cache issues */ #define ASSIGN_UNLESS_EQUAL(var, val) do { \ if ((var) != (val)) \ @@ -2133,20 +2114,6 @@ virtio_dev_tx_packed_zmbuf(struct virtio_net *dev, return pkt_idx; } -static __rte_always_inline bool -next_desc_is_avail(const struct vhost_virtqueue *vq) -{ - bool wrap_counter = vq->avail_wrap_counter; - uint16_t next_used_idx = vq->last_used_idx + 1; - - if (next_used_idx >= vq->size) { - next_used_idx -= vq->size; - wrap_counter ^= 1; - } - - return desc_is_avail(&vq->desc_packed[next_used_idx], wrap_counter); -} - static __rte_noinline uint16_t virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, @@ -2163,7 +2130,6 @@ virtio_dev_tx_packed(struct virtio_net *dev, if (remained >= PACKED_BATCH_SIZE) { if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool, &pkts[pkt_idx])) { - vhost_flush_dequeue_packed(dev, vq); pkt_idx += PACKED_BATCH_SIZE; remained -= PACKED_BATCH_SIZE; continue; @@ -2173,7 +2139,6 @@ virtio_dev_tx_packed(struct virtio_net *dev, if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, &pkts[pkt_idx])) break; - vhost_flush_dequeue_packed(dev, vq); pkt_idx++; remained--; @@ -2182,15 +2147,8 @@ virtio_dev_tx_packed(struct virtio_net *dev, if (vq->shadow_used_idx) { do_data_copy_dequeue(vq); - if (remained && !next_desc_is_avail(vq)) { - /* - * The guest may be waiting to TX some buffers to - * enqueue more to avoid bufferfloat, so we try to - * reduce latency here. - */ - vhost_flush_dequeue_shadow_packed(dev, vq); - vhost_vring_call_packed(dev, vq); - } + vhost_flush_dequeue_shadow_packed(dev, vq); + vhost_vring_call_packed(dev, vq); } return pkt_idx; -- 2.17.1
On 4/17/20 4:39 AM, Marvin Liu wrote:
> Defer shadow ring update introduces functional issue which has been
> described in Eugenio's fix patch.
>
> The current implementation of vhost_net in packed vring tries to fill
> the shadow vector before send any actual changes to the guest. While
> this can be beneficial for the throughput, it conflicts with some
> bufferfloats methods like the linux kernel napi, that stops
> transmitting packets if there are too much bytes/buffers in the
> driver.
>
> It also introduces performance issue when frontend run much faster than
> backend. Frontend may not be able to collect available descs when shadow
> update is deferred. That will harm RFC2544 throughput.
>
> Appropriate choice is to remove deferred shadowed update method.
> Now shadowed used descs are flushed at the end of dequeue function.
>
> Fixes: 31d6c6a5b820 ("vhost: optimize packed ring dequeue")
> Cc: stable@dpdk.org
>
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> Tested-by: Wang, Yinan <yinan.wang@intel.com>
>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
On 4/17/20 4:39 AM, Marvin Liu wrote:
> Defer shadow ring update introduces functional issue which has been
> described in Eugenio's fix patch.
>
> The current implementation of vhost_net in packed vring tries to fill
> the shadow vector before send any actual changes to the guest. While
> this can be beneficial for the throughput, it conflicts with some
> bufferfloats methods like the linux kernel napi, that stops
> transmitting packets if there are too much bytes/buffers in the
> driver.
>
> It also introduces performance issue when frontend run much faster than
> backend. Frontend may not be able to collect available descs when shadow
> update is deferred. That will harm RFC2544 throughput.
>
> Appropriate choice is to remove deferred shadowed update method.
> Now shadowed used descs are flushed at the end of dequeue function.
>
> Fixes: 31d6c6a5b820 ("vhost: optimize packed ring dequeue")
> Cc: stable@dpdk.org
>
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> Tested-by: Wang, Yinan <yinan.wang@intel.com>
Applied to dpdk-next-virtio/master
Thanks,
Maxime