DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v4] vhost: batch used descs chains write-back with packed ring
@ 2018-12-20 16:47 Maxime Coquelin
  2018-12-20 18:19 ` Michael S. Tsirkin
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Maxime Coquelin @ 2018-12-20 16:47 UTC (permalink / raw)
  To: dev, i.maximets, tiwei.bie, zhihong.wang, jfreiman, mst; +Cc: Maxime Coquelin

Instead of writing back descriptors chains in order, let's
write the first chain flags last in order to improve batching.

Also, move the write barrier in logging cache sync, so that it
is done only when logging is enabled. It means there is now
one more barrier for split ring when logging is enabled.

With Kernel's pktgen benchmark, ~3% performance gain is measured.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.h      |  2 ++
 lib/librte_vhost/virtio_net.c | 19 ++++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 3b3265c4b..7d1d8a308 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -457,6 +457,8 @@ vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
 		   !dev->log_base))
 		return;
 
+	rte_smp_wmb();
+
 	log_base = (unsigned long *)(uintptr_t)dev->log_base;
 
 	/*
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 8c657a101..02c1fd3a4 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -97,6 +97,8 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
 {
 	int i;
 	uint16_t used_idx = vq->last_used_idx;
+	uint16_t head_idx = vq->last_used_idx;
+	uint16_t head_flags = 0;
 
 	/* Split loop in two to save memory barriers */
 	for (i = 0; i < vq->shadow_used_idx; i++) {
@@ -126,12 +128,17 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
 			flags &= ~VRING_DESC_F_AVAIL;
 		}
 
-		vq->desc_packed[vq->last_used_idx].flags = flags;
+		if (i > 0) {
+			vq->desc_packed[vq->last_used_idx].flags = flags;
 
-		vhost_log_cache_used_vring(dev, vq,
+			vhost_log_cache_used_vring(dev, vq,
 					vq->last_used_idx *
 					sizeof(struct vring_packed_desc),
 					sizeof(struct vring_packed_desc));
+		} else {
+			head_idx = vq->last_used_idx;
+			head_flags = flags;
+		}
 
 		vq->last_used_idx += vq->shadow_used_packed[i].count;
 		if (vq->last_used_idx >= vq->size) {
@@ -140,7 +147,13 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
 		}
 	}
 
-	rte_smp_wmb();
+	vq->desc_packed[head_idx].flags = head_flags;
+
+	vhost_log_cache_used_vring(dev, vq,
+				head_idx *
+				sizeof(struct vring_packed_desc),
+				sizeof(struct vring_packed_desc));
+
 	vq->shadow_used_idx = 0;
 	vhost_log_cache_sync(dev, vq);
 }
-- 
2.17.2

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [PATCH v4] vhost: batch used descs chains write-back with packed ring
  2018-12-20 16:47 [dpdk-dev] [PATCH v4] vhost: batch used descs chains write-back with packed ring Maxime Coquelin
@ 2018-12-20 18:19 ` Michael S. Tsirkin
  2018-12-21  2:28 ` Tiwei Bie
  2018-12-21  9:20 ` Maxime Coquelin
  2 siblings, 0 replies; 4+ messages in thread
From: Michael S. Tsirkin @ 2018-12-20 18:19 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, i.maximets, tiwei.bie, zhihong.wang, jfreiman

On Thu, Dec 20, 2018 at 05:47:55PM +0100, Maxime Coquelin wrote:
> Instead of writing back descriptors chains in order, let's
> write the first chain flags last in order to improve batching.
> 
> Also, move the write barrier in logging cache sync, so that it
> is done only when logging is enabled. It means there is now
> one more barrier for split ring when logging is enabled.
> 
> With Kernel's pktgen benchmark, ~3% performance gain is measured.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Acked-by: Michael S. Tsirkin <mst@redhat.com>


> ---
>  lib/librte_vhost/vhost.h      |  2 ++
>  lib/librte_vhost/virtio_net.c | 19 ++++++++++++++++---
>  2 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 3b3265c4b..7d1d8a308 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -457,6 +457,8 @@ vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
>  		   !dev->log_base))
>  		return;
>  
> +	rte_smp_wmb();
> +
>  	log_base = (unsigned long *)(uintptr_t)dev->log_base;
>  
>  	/*
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 8c657a101..02c1fd3a4 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -97,6 +97,8 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
>  {
>  	int i;
>  	uint16_t used_idx = vq->last_used_idx;
> +	uint16_t head_idx = vq->last_used_idx;
> +	uint16_t head_flags = 0;
>  
>  	/* Split loop in two to save memory barriers */
>  	for (i = 0; i < vq->shadow_used_idx; i++) {
> @@ -126,12 +128,17 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
>  			flags &= ~VRING_DESC_F_AVAIL;
>  		}
>  
> -		vq->desc_packed[vq->last_used_idx].flags = flags;
> +		if (i > 0) {
> +			vq->desc_packed[vq->last_used_idx].flags = flags;
>  
> -		vhost_log_cache_used_vring(dev, vq,
> +			vhost_log_cache_used_vring(dev, vq,
>  					vq->last_used_idx *
>  					sizeof(struct vring_packed_desc),
>  					sizeof(struct vring_packed_desc));
> +		} else {
> +			head_idx = vq->last_used_idx;
> +			head_flags = flags;
> +		}
>  
>  		vq->last_used_idx += vq->shadow_used_packed[i].count;
>  		if (vq->last_used_idx >= vq->size) {
> @@ -140,7 +147,13 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
>  		}
>  	}
>  
> -	rte_smp_wmb();
> +	vq->desc_packed[head_idx].flags = head_flags;
> +
> +	vhost_log_cache_used_vring(dev, vq,
> +				head_idx *
> +				sizeof(struct vring_packed_desc),
> +				sizeof(struct vring_packed_desc));
> +
>  	vq->shadow_used_idx = 0;
>  	vhost_log_cache_sync(dev, vq);
>  }
> -- 
> 2.17.2

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [PATCH v4] vhost: batch used descs chains write-back with packed ring
  2018-12-20 16:47 [dpdk-dev] [PATCH v4] vhost: batch used descs chains write-back with packed ring Maxime Coquelin
  2018-12-20 18:19 ` Michael S. Tsirkin
@ 2018-12-21  2:28 ` Tiwei Bie
  2018-12-21  9:20 ` Maxime Coquelin
  2 siblings, 0 replies; 4+ messages in thread
From: Tiwei Bie @ 2018-12-21  2:28 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, i.maximets, zhihong.wang, jfreiman, mst

On Thu, Dec 20, 2018 at 05:47:55PM +0100, Maxime Coquelin wrote:
> Instead of writing back descriptors chains in order, let's
> write the first chain flags last in order to improve batching.
> 
> Also, move the write barrier in logging cache sync, so that it
> is done only when logging is enabled. It means there is now
> one more barrier for split ring when logging is enabled.
> 
> With Kernel's pktgen benchmark, ~3% performance gain is measured.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_vhost/vhost.h      |  2 ++
>  lib/librte_vhost/virtio_net.c | 19 ++++++++++++++++---
>  2 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 3b3265c4b..7d1d8a308 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -457,6 +457,8 @@ vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
>  		   !dev->log_base))
>  		return;
>  
> +	rte_smp_wmb();

Better to also remove below comments (which can be done
when applying the patch):

https://github.com/DPDK/dpdk/blob/dafc04c15174/lib/librte_vhost/vhost.h#L461-L464

Reviewed-by: Tiwei Bie <tiwei.bie@intel.com>

> +
>  	log_base = (unsigned long *)(uintptr_t)dev->log_base;
>  
>  	/*
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 8c657a101..02c1fd3a4 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -97,6 +97,8 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
>  {
>  	int i;
>  	uint16_t used_idx = vq->last_used_idx;
> +	uint16_t head_idx = vq->last_used_idx;
> +	uint16_t head_flags = 0;
>  
>  	/* Split loop in two to save memory barriers */
>  	for (i = 0; i < vq->shadow_used_idx; i++) {
> @@ -126,12 +128,17 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
>  			flags &= ~VRING_DESC_F_AVAIL;
>  		}
>  
> -		vq->desc_packed[vq->last_used_idx].flags = flags;
> +		if (i > 0) {
> +			vq->desc_packed[vq->last_used_idx].flags = flags;
>  
> -		vhost_log_cache_used_vring(dev, vq,
> +			vhost_log_cache_used_vring(dev, vq,
>  					vq->last_used_idx *
>  					sizeof(struct vring_packed_desc),
>  					sizeof(struct vring_packed_desc));
> +		} else {
> +			head_idx = vq->last_used_idx;
> +			head_flags = flags;
> +		}
>  
>  		vq->last_used_idx += vq->shadow_used_packed[i].count;
>  		if (vq->last_used_idx >= vq->size) {
> @@ -140,7 +147,13 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
>  		}
>  	}
>  
> -	rte_smp_wmb();
> +	vq->desc_packed[head_idx].flags = head_flags;
> +
> +	vhost_log_cache_used_vring(dev, vq,
> +				head_idx *
> +				sizeof(struct vring_packed_desc),
> +				sizeof(struct vring_packed_desc));
> +
>  	vq->shadow_used_idx = 0;
>  	vhost_log_cache_sync(dev, vq);
>  }
> -- 
> 2.17.2
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [PATCH v4] vhost: batch used descs chains write-back with packed ring
  2018-12-20 16:47 [dpdk-dev] [PATCH v4] vhost: batch used descs chains write-back with packed ring Maxime Coquelin
  2018-12-20 18:19 ` Michael S. Tsirkin
  2018-12-21  2:28 ` Tiwei Bie
@ 2018-12-21  9:20 ` Maxime Coquelin
  2 siblings, 0 replies; 4+ messages in thread
From: Maxime Coquelin @ 2018-12-21  9:20 UTC (permalink / raw)
  To: dev, i.maximets, tiwei.bie, zhihong.wang, jfreiman, mst



On 12/20/18 5:47 PM, Maxime Coquelin wrote:
> Instead of writing back descriptors chains in order, let's
> write the first chain flags last in order to improve batching.
> 
> Also, move the write barrier in logging cache sync, so that it
> is done only when logging is enabled. It means there is now
> one more barrier for split ring when logging is enabled.
> 
> With Kernel's pktgen benchmark, ~3% performance gain is measured.
> 
> Signed-off-by: Maxime Coquelin<maxime.coquelin@redhat.com>
> ---
>   lib/librte_vhost/vhost.h      |  2 ++
>   lib/librte_vhost/virtio_net.c | 19 ++++++++++++++++---
>   2 files changed, 18 insertions(+), 3 deletions(-)

Applied to dpdk-next-virtio

Maxime

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-12-21  9:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-20 16:47 [dpdk-dev] [PATCH v4] vhost: batch used descs chains write-back with packed ring Maxime Coquelin
2018-12-20 18:19 ` Michael S. Tsirkin
2018-12-21  2:28 ` Tiwei Bie
2018-12-21  9:20 ` Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).