DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle chained vring descriptors.
@ 2015-05-04  6:26 Ouyang Changchun
  2015-05-12 10:00 ` Thomas Monjalon
                   ` (2 more replies)
  0 siblings, 3 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-05-04  6:26 UTC (permalink / raw)
  To: dev

Vring enqueue need consider the 2 cases:
 1. Vring descriptors chained together, the first one is for virtio header, the rest are for real data;
 2. Only one descriptor, virtio header and real data share one single descriptor;

So does vring dequeue.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 60 +++++++++++++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 510ffe8..3135883 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint64_t buff_addr = 0;
 	uint64_t buff_hdr_addr = 0;
-	uint32_t head[MAX_PKT_BURST], packet_len = 0;
+	uint32_t head[MAX_PKT_BURST];
 	uint32_t head_idx, packet_success = 0;
 	uint16_t avail_idx, res_cur_idx;
 	uint16_t res_base_idx, res_end_idx;
@@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	rte_prefetch0(&vq->desc[head[packet_success]]);
 
 	while (res_cur_idx != res_end_idx) {
+		uint32_t offset = 0;
+		uint32_t data_len, len_to_cpy;
+		uint8_t plus_hdr = 0;
+
 		/* Get descriptor from available ring */
 		desc = &vq->desc[head[packet_success]];
 
@@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 		/* Copy virtio_hdr to packet and increment buffer address */
 		buff_hdr_addr = buff_addr;
-		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
 
 		/*
 		 * If the descriptors are chained the header and data are
@@ -136,24 +139,44 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
-			desc->len = rte_pktmbuf_data_len(buff);
 		} else {
 			buff_addr += vq->vhost_hlen;
-			desc->len = packet_len;
+			plus_hdr = 1;
 		}
 
+		data_len = rte_pktmbuf_data_len(buff);
+		len_to_cpy = RTE_MIN(data_len, desc->len);
+		do {
+			if (len_to_cpy > 0) {
+				/* Copy mbuf data to buffer */
+				rte_memcpy((void *)(uintptr_t)buff_addr,
+					(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
+					len_to_cpy);
+				PRINT_PACKET(dev, (uintptr_t)buff_addr,
+					len_to_cpy, 0);
+
+				desc->len = len_to_cpy + (plus_hdr ? vq->vhost_hlen : 0);
+				offset += len_to_cpy;
+				if (desc->flags & VRING_DESC_F_NEXT) {
+					desc = &vq->desc[desc->next];
+					buff_addr = gpa_to_vva(dev, desc->addr);
+					len_to_cpy = RTE_MIN(data_len - offset, desc->len);
+				} else
+					break;
+			} else {
+				desc->len = 0;
+				if (desc->flags & VRING_DESC_F_NEXT)
+                                        desc = &vq->desc[desc->next];
+				else
+					break;
+			}
+		} while (1);
+
 		/* Update used ring with desc information */
 		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
 							head[packet_success];
-		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
-
-		/* Copy mbuf data to buffer */
-		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
-		rte_memcpy((void *)(uintptr_t)buff_addr,
-			rte_pktmbuf_mtod(buff, const void *),
-			rte_pktmbuf_data_len(buff));
-		PRINT_PACKET(dev, (uintptr_t)buff_addr,
-			rte_pktmbuf_data_len(buff), 0);
+		vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							offset + vq->vhost_hlen;
 
 		res_cur_idx++;
 		packet_success++;
@@ -583,7 +606,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		desc = &vq->desc[head[entry_success]];
 
 		/* Discard first buffer as it is the virtio header */
-		desc = &vq->desc[desc->next];
+		if (desc->flags & VRING_DESC_F_NEXT) {
+			desc = &vq->desc[desc->next];
+			vb_offset = 0;
+			vb_avail = desc->len;
+		} else {
+			vb_offset = vq->vhost_hlen;
+			vb_avail = desc->len - vb_offset;
+		}
 
 		/* Buffer address translation. */
 		vb_addr = gpa_to_vva(dev, desc->addr);
@@ -602,8 +632,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		vq->used->ring[used_idx].id = head[entry_success];
 		vq->used->ring[used_idx].len = 0;
 
-		vb_offset = 0;
-		vb_avail = desc->len;
 		/* Allocate an mbuf and populate the structure. */
 		m = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(m == NULL)) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle chained vring descriptors.
  2015-05-04  6:26 [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
@ 2015-05-12 10:00 ` Thomas Monjalon
  2015-05-18  9:39 ` Xie, Huawei
  2015-05-28 15:16 ` [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2 siblings, 0 replies; 52+ messages in thread
From: Thomas Monjalon @ 2015-05-12 10:00 UTC (permalink / raw)
  To: Ouyang Changchun; +Cc: dev

Hi Changchun,

Why the title begins with virtio for a patch on vhost?
Could you rephrase it in a positive form?

2015-05-04 14:26, Ouyang Changchun:
> Vring enqueue need consider the 2 cases:
>  1. Vring descriptors chained together, the first one is for virtio header, the rest are for real data;
>  2. Only one descriptor, virtio header and real data share one single descriptor;

Please explain what was not working before.

> So does vring dequeue.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_vhost/vhost_rxtx.c | 60 +++++++++++++++++++++++++++++++------------
>  1 file changed, 44 insertions(+), 16 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 510ffe8..3135883 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>  	uint64_t buff_addr = 0;
>  	uint64_t buff_hdr_addr = 0;
> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> +	uint32_t head[MAX_PKT_BURST];
>  	uint32_t head_idx, packet_success = 0;
>  	uint16_t avail_idx, res_cur_idx;
>  	uint16_t res_base_idx, res_end_idx;
> @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>  
>  	while (res_cur_idx != res_end_idx) {
> +		uint32_t offset = 0;
> +		uint32_t data_len, len_to_cpy;
> +		uint8_t plus_hdr = 0;

plus_hdr is not very meaningful to me

I'm not a vhost expert so I won't review the rest.
If nobody comments it in the coming days, it will be accepted.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle chained vring descriptors.
  2015-05-04  6:26 [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
  2015-05-12 10:00 ` Thomas Monjalon
@ 2015-05-18  9:39 ` Xie, Huawei
  2015-05-18 13:23   ` Ouyang, Changchun
  2015-05-28 15:16 ` [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2 siblings, 1 reply; 52+ messages in thread
From: Xie, Huawei @ 2015-05-18  9:39 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

On 5/4/2015 2:27 PM, Ouyang Changchun wrote:
> Vring enqueue need consider the 2 cases:
>  1. Vring descriptors chained together, the first one is for virtio header, the rest are for real data;
>  2. Only one descriptor, virtio header and real data share one single descriptor;
>
> So does vring dequeue.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_vhost/vhost_rxtx.c | 60 +++++++++++++++++++++++++++++++------------
>  1 file changed, 44 insertions(+), 16 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 510ffe8..3135883 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>  	uint64_t buff_addr = 0;
>  	uint64_t buff_hdr_addr = 0;
> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> +	uint32_t head[MAX_PKT_BURST];
>  	uint32_t head_idx, packet_success = 0;
>  	uint16_t avail_idx, res_cur_idx;
>  	uint16_t res_base_idx, res_end_idx;
> @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>  
>  	while (res_cur_idx != res_end_idx) {
> +		uint32_t offset = 0;
> +		uint32_t data_len, len_to_cpy;
> +		uint8_t plus_hdr = 0;
> +
>  		/* Get descriptor from available ring */
>  		desc = &vq->desc[head[packet_success]];
>  
> @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  
>  		/* Copy virtio_hdr to packet and increment buffer address */
>  		buff_hdr_addr = buff_addr;
> -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>  
>  		/*
>  		 * If the descriptors are chained the header and data are
> @@ -136,24 +139,44 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  			desc = &vq->desc[desc->next];
>  			/* Buffer address translation. */
>  			buff_addr = gpa_to_vva(dev, desc->addr);
> -			desc->len = rte_pktmbuf_data_len(buff);
>  		} else {
>  			buff_addr += vq->vhost_hlen;
> -			desc->len = packet_len;
> +			plus_hdr = 1;
>  		}
>  
> +		data_len = rte_pktmbuf_data_len(buff);
> +		len_to_cpy = RTE_MIN(data_len, desc->len);
> +		do {
> +			if (len_to_cpy > 0) {
> +				/* Copy mbuf data to buffer */
> +				rte_memcpy((void *)(uintptr_t)buff_addr,
> +					(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
> +					len_to_cpy);
> +				PRINT_PACKET(dev, (uintptr_t)buff_addr,
> +					len_to_cpy, 0);
> +
> +				desc->len = len_to_cpy + (plus_hdr ? vq->vhost_hlen : 0);

Do we really need to rewrite the desc->len again and again?  At least we
only have the possibility to change the value of desc->len of the last
descriptor.

> +				offset += len_to_cpy;
> +				if (desc->flags & VRING_DESC_F_NEXT) {
> +					desc = &vq->desc[desc->next];
> +					buff_addr = gpa_to_vva(dev, desc->addr);
> +					len_to_cpy = RTE_MIN(data_len - offset, desc->len);
> +				} else
> +					break;

Still there are two issues here.
a) If the data couldn't be fully copied to chain of guest buffers, we
shouldn't do any copy.
b) scatter mbuf isn't considered.

> +			} else {
> +				desc->len = 0;
> +				if (desc->flags & VRING_DESC_F_NEXT)
> +                                        desc = &vq->desc[desc->next];
> +				else
> +					break;
> +			}
> +		} while (1);
> +
>  		/* Update used ring with desc information */
>  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
>  							head[packet_success];
> -		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
> -
> -		/* Copy mbuf data to buffer */
> -		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
> -		rte_memcpy((void *)(uintptr_t)buff_addr,
> -			rte_pktmbuf_mtod(buff, const void *),
> -			rte_pktmbuf_data_len(buff));
> -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> -			rte_pktmbuf_data_len(buff), 0);
> +		vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> +							offset + vq->vhost_hlen;
>  
>  		res_cur_idx++;
>  		packet_success++;
> @@ -583,7 +606,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		desc = &vq->desc[head[entry_success]];
>  
>  		/* Discard first buffer as it is the virtio header */
> -		desc = &vq->desc[desc->next];
> +		if (desc->flags & VRING_DESC_F_NEXT) {
> +			desc = &vq->desc[desc->next];
> +			vb_offset = 0;
> +			vb_avail = desc->len;
> +		} else {
> +			vb_offset = vq->vhost_hlen;
> +			vb_avail = desc->len - vb_offset;
> +		}
>  
>  		/* Buffer address translation. */
>  		vb_addr = gpa_to_vva(dev, desc->addr);
> @@ -602,8 +632,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		vq->used->ring[used_idx].id = head[entry_success];
>  		vq->used->ring[used_idx].len = 0;
>  
> -		vb_offset = 0;
> -		vb_avail = desc->len;
>  		/* Allocate an mbuf and populate the structure. */
>  		m = rte_pktmbuf_alloc(mbuf_pool);
>  		if (unlikely(m == NULL)) {


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle chained vring descriptors.
  2015-05-18  9:39 ` Xie, Huawei
@ 2015-05-18 13:23   ` Ouyang, Changchun
  2015-05-20  5:26     ` Xie, Huawei
  0 siblings, 1 reply; 52+ messages in thread
From: Ouyang, Changchun @ 2015-05-18 13:23 UTC (permalink / raw)
  To: Xie, Huawei, dev

Hi Huawei,

> -----Original Message-----
> From: Xie, Huawei
> Sent: Monday, May 18, 2015 5:39 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle
> chained vring descriptors.
> 
> On 5/4/2015 2:27 PM, Ouyang Changchun wrote:
> > Vring enqueue need consider the 2 cases:
> >  1. Vring descriptors chained together, the first one is for virtio
> > header, the rest are for real data;  2. Only one descriptor, virtio
> > header and real data share one single descriptor;
> >
> > So does vring dequeue.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >  lib/librte_vhost/vhost_rxtx.c | 60
> > +++++++++++++++++++++++++++++++------------
> >  1 file changed, 44 insertions(+), 16 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost_rxtx.c
> > b/lib/librte_vhost/vhost_rxtx.c index 510ffe8..3135883 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> >  	uint64_t buff_addr = 0;
> >  	uint64_t buff_hdr_addr = 0;
> > -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> > +	uint32_t head[MAX_PKT_BURST];
> >  	uint32_t head_idx, packet_success = 0;
> >  	uint16_t avail_idx, res_cur_idx;
> >  	uint16_t res_base_idx, res_end_idx;
> > @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	rte_prefetch0(&vq->desc[head[packet_success]]);
> >
> >  	while (res_cur_idx != res_end_idx) {
> > +		uint32_t offset = 0;
> > +		uint32_t data_len, len_to_cpy;
> > +		uint8_t plus_hdr = 0;
> > +
> >  		/* Get descriptor from available ring */
> >  		desc = &vq->desc[head[packet_success]];
> >
> > @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,
> >
> >  		/* Copy virtio_hdr to packet and increment buffer address */
> >  		buff_hdr_addr = buff_addr;
> > -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> >
> >  		/*
> >  		 * If the descriptors are chained the header and data are @@
> > -136,24 +139,44 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  			desc = &vq->desc[desc->next];
> >  			/* Buffer address translation. */
> >  			buff_addr = gpa_to_vva(dev, desc->addr);
> > -			desc->len = rte_pktmbuf_data_len(buff);
> >  		} else {
> >  			buff_addr += vq->vhost_hlen;
> > -			desc->len = packet_len;
> > +			plus_hdr = 1;
> >  		}
> >
> > +		data_len = rte_pktmbuf_data_len(buff);
> > +		len_to_cpy = RTE_MIN(data_len, desc->len);
> > +		do {
> > +			if (len_to_cpy > 0) {
> > +				/* Copy mbuf data to buffer */
> > +				rte_memcpy((void *)(uintptr_t)buff_addr,
> > +					(const void
> *)(rte_pktmbuf_mtod(buff, const char *) + offset),
> > +					len_to_cpy);
> > +				PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > +					len_to_cpy, 0);
> > +
> > +				desc->len = len_to_cpy + (plus_hdr ? vq-
> >vhost_hlen : 0);
> 
> Do we really need to rewrite the desc->len again and again?  At least we only
> have the possibility to change the value of desc->len of the last descriptor.

Well, I think we need change each descriptor's len in the chain here,
If aggregate all len to the last descriptor's len, it is possibly the length will exceed its original len,
e.g. use 8 descriptor(each has len of 1024) chained to recv a 8K packet, then last descriptor's len
will be 8K, and all other descriptor is 0, I don't think this situation make sense.  

> 
> > +				offset += len_to_cpy;
> > +				if (desc->flags & VRING_DESC_F_NEXT) {
> > +					desc = &vq->desc[desc->next];
> > +					buff_addr = gpa_to_vva(dev, desc-
> >addr);
> > +					len_to_cpy = RTE_MIN(data_len -
> offset, desc->len);
> > +				} else
> > +					break;
> 
> Still there are two issues here.
> a) If the data couldn't be fully copied to chain of guest buffers, we shouldn't
> do any copy.

Why don't copy any data is better than the current implementation?

> b) scatter mbuf isn't considered.

If we also consider scatter mbuf here, then this function will have exactly same logic with mergeable_rx,
Do you want to totally remove this function, just keep the mergeable rx function for all cases?

Changchun

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle chained vring descriptors.
  2015-05-18 13:23   ` Ouyang, Changchun
@ 2015-05-20  5:26     ` Xie, Huawei
  0 siblings, 0 replies; 52+ messages in thread
From: Xie, Huawei @ 2015-05-20  5:26 UTC (permalink / raw)
  To: Ouyang, Changchun; +Cc: dev

On 5/18/2015 9:23 PM, Ouyang, Changchun wrote:
> Hi Huawei,
>
>> -----Original Message-----
>> From: Xie, Huawei
>> Sent: Monday, May 18, 2015 5:39 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle
>> chained vring descriptors.
>>
>> On 5/4/2015 2:27 PM, Ouyang Changchun wrote:
>>> Vring enqueue need consider the 2 cases:
>>>  1. Vring descriptors chained together, the first one is for virtio
>>> header, the rest are for real data;  2. Only one descriptor, virtio
>>> header and real data share one single descriptor;
>>>
>>> So does vring dequeue.
>>>
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>  lib/librte_vhost/vhost_rxtx.c | 60
>>> +++++++++++++++++++++++++++++++------------
>>>  1 file changed, 44 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/lib/librte_vhost/vhost_rxtx.c
>>> b/lib/librte_vhost/vhost_rxtx.c index 510ffe8..3135883 100644
>>> --- a/lib/librte_vhost/vhost_rxtx.c
>>> +++ b/lib/librte_vhost/vhost_rxtx.c
>>> @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
>> queue_id,
>>>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>>>  	uint64_t buff_addr = 0;
>>>  	uint64_t buff_hdr_addr = 0;
>>> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
>>> +	uint32_t head[MAX_PKT_BURST];
>>>  	uint32_t head_idx, packet_success = 0;
>>>  	uint16_t avail_idx, res_cur_idx;
>>>  	uint16_t res_base_idx, res_end_idx;
>>> @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
>> queue_id,
>>>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>>>
>>>  	while (res_cur_idx != res_end_idx) {
>>> +		uint32_t offset = 0;
>>> +		uint32_t data_len, len_to_cpy;
>>> +		uint8_t plus_hdr = 0;
>>> +
>>>  		/* Get descriptor from available ring */
>>>  		desc = &vq->desc[head[packet_success]];
>>>
>>> @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
>>> queue_id,
>>>
>>>  		/* Copy virtio_hdr to packet and increment buffer address */
>>>  		buff_hdr_addr = buff_addr;
>>> -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>>>
>>>  		/*
>>>  		 * If the descriptors are chained the header and data are @@
>>> -136,24 +139,44 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
>> queue_id,
>>>  			desc = &vq->desc[desc->next];
>>>  			/* Buffer address translation. */
>>>  			buff_addr = gpa_to_vva(dev, desc->addr);
>>> -			desc->len = rte_pktmbuf_data_len(buff);
>>>  		} else {
>>>  			buff_addr += vq->vhost_hlen;
>>> -			desc->len = packet_len;
>>> +			plus_hdr = 1;
>>>  		}
>>>
>>> +		data_len = rte_pktmbuf_data_len(buff);
>>> +		len_to_cpy = RTE_MIN(data_len, desc->len);
>>> +		do {
>>> +			if (len_to_cpy > 0) {
>>> +				/* Copy mbuf data to buffer */
>>> +				rte_memcpy((void *)(uintptr_t)buff_addr,
>>> +					(const void
>> *)(rte_pktmbuf_mtod(buff, const char *) + offset),
>>> +					len_to_cpy);
>>> +				PRINT_PACKET(dev, (uintptr_t)buff_addr,
>>> +					len_to_cpy, 0);
>>> +
>>> +				desc->len = len_to_cpy + (plus_hdr ? vq-
>>> vhost_hlen : 0);
>> Do we really need to rewrite the desc->len again and again?  At least we only
>> have the possibility to change the value of desc->len of the last descriptor.
> Well, I think we need change each descriptor's len in the chain here,
> If aggregate all len to the last descriptor's len, it is possibly the length will exceed its original len,
> e.g. use 8 descriptor(each has len of 1024) chained to recv a 8K packet, then last descriptor's len
> will be 8K, and all other descriptor is 0, I don't think this situation make sense.  
Let me explain this way.
We receive a packet with 350 bytes size, and we have descriptor chain of
10 descs, each with  100 byte size.
At least we don't need to change the len field of first three descriptors.
Whether we need to change the 4th len field to 50, and the rest of them
to zero is still a question(used->len is updated to 350).
We need check the VIRTIO spec.
>>> +				offset += len_to_cpy;
>>> +				if (desc->flags & VRING_DESC_F_NEXT) {
>>> +					desc = &vq->desc[desc->next];
>>> +					buff_addr = gpa_to_vva(dev, desc-
>>> addr);
>>> +					len_to_cpy = RTE_MIN(data_len -
>> offset, desc->len);
>>> +				} else
>>> +					break;
>> Still there are two issues here.
>> a) If the data couldn't be fully copied to chain of guest buffers, we shouldn't
>> do any copy.
> Why don't copy any data is better than the current implementation?
We don't need to pass part of a packet to guest.
>
>> b) scatter mbuf isn't considered.
> If we also consider scatter mbuf here, then this function will have exactly same logic with mergeable_rx,
> Do you want to totally remove this function, just keep the mergeable rx function for all cases?
>
> Changchun
>
>
>


^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue
  2015-05-04  6:26 [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
  2015-05-12 10:00 ` Thomas Monjalon
  2015-05-18  9:39 ` Xie, Huawei
@ 2015-05-28 15:16 ` Ouyang Changchun
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
                     ` (5 more replies)
  2 siblings, 6 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-05-28 15:16 UTC (permalink / raw)
  To: dev

Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (5):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating
  lib_vhost: Add support copying scattered mbuf to vring

 lib/librte_vhost/vhost_rxtx.c | 287 +++++++++++++++++++++++++++++++-----------
 1 file changed, 216 insertions(+), 71 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-05-28 15:16 ` [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue Ouyang Changchun
@ 2015-05-28 15:16   ` Ouyang Changchun
  2015-05-31  5:03     ` Xie, Huawei
  2015-05-31  8:40     ` Xie, Huawei
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 2/5] lib_vhost: Refine code style Ouyang Changchun
                     ` (4 subsequent siblings)
  5 siblings, 2 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-05-28 15:16 UTC (permalink / raw)
  To: dev

Vring enqueue need consider the 2 cases:
 1. Vring descriptors chained together, the first one is for virtio header, the rest are for real
    data, virtio driver in Linux usually use this scheme;
 2. Only one descriptor, virtio header and real data share one single descriptor, virtio-net pmd use
    such scheme;

So does vring dequeue, it should not assume vring descriptor is chained or not chained, virtio in
different Linux version has different behavior, e.g. fedora 20 use chained vring descriptor, while
fedora 21 use one single vring descriptor for tx.

Changes in v2
  - drop the uncompleted packet
  - refine code logic

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 65 +++++++++++++++++++++++++++++++++----------
 1 file changed, 50 insertions(+), 15 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 4809d32..06ae2df 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint64_t buff_addr = 0;
 	uint64_t buff_hdr_addr = 0;
-	uint32_t head[MAX_PKT_BURST], packet_len = 0;
+	uint32_t head[MAX_PKT_BURST];
 	uint32_t head_idx, packet_success = 0;
 	uint16_t avail_idx, res_cur_idx;
 	uint16_t res_base_idx, res_end_idx;
@@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	rte_prefetch0(&vq->desc[head[packet_success]]);
 
 	while (res_cur_idx != res_end_idx) {
+		uint32_t offset = 0;
+		uint32_t data_len, len_to_cpy;
+		uint8_t hdr = 0, uncompleted_pkt = 0;
+
 		/* Get descriptor from available ring */
 		desc = &vq->desc[head[packet_success]];
 
@@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 		/* Copy virtio_hdr to packet and increment buffer address */
 		buff_hdr_addr = buff_addr;
-		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
 
 		/*
 		 * If the descriptors are chained the header and data are
@@ -136,28 +139,55 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
-			desc->len = rte_pktmbuf_data_len(buff);
 		} else {
 			buff_addr += vq->vhost_hlen;
-			desc->len = packet_len;
+			hdr = 1;
 		}
 
+		data_len = rte_pktmbuf_data_len(buff);
+		len_to_cpy = RTE_MIN(data_len,
+			hdr ? desc->len - vq->vhost_hlen : desc->len);
+		while (len_to_cpy > 0) {
+			/* Copy mbuf data to buffer */
+			rte_memcpy((void *)(uintptr_t)buff_addr,
+				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
+				len_to_cpy);
+			PRINT_PACKET(dev, (uintptr_t)buff_addr,
+				len_to_cpy, 0);
+
+			offset += len_to_cpy;
+
+			if (offset == data_len)
+				break;
+
+			if (desc->flags & VRING_DESC_F_NEXT) {
+				desc = &vq->desc[desc->next];
+				buff_addr = gpa_to_vva(dev, desc->addr);
+				len_to_cpy = RTE_MIN(data_len - offset, desc->len);
+			} else {
+				/* Room in vring buffer is not enough */
+				uncompleted_pkt = 1;
+				break;
+			}
+		};
+
 		/* Update used ring with desc information */
 		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
 							head[packet_success];
-		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
 
-		/* Copy mbuf data to buffer */
-		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
-		rte_memcpy((void *)(uintptr_t)buff_addr,
-			rte_pktmbuf_mtod(buff, const void *),
-			rte_pktmbuf_data_len(buff));
-		PRINT_PACKET(dev, (uintptr_t)buff_addr,
-			rte_pktmbuf_data_len(buff), 0);
+		/* Drop the packet if it is uncompleted */
+		if (unlikely(uncompleted_pkt == 1))
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;
+		else
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							offset + vq->vhost_hlen;
 
 		res_cur_idx++;
 		packet_success++;
 
+		if (unlikely(uncompleted_pkt == 1))
+			continue;
+
 		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
 			(const void *)&virtio_hdr, vq->vhost_hlen);
 
@@ -589,7 +619,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		desc = &vq->desc[head[entry_success]];
 
 		/* Discard first buffer as it is the virtio header */
-		desc = &vq->desc[desc->next];
+		if (desc->flags & VRING_DESC_F_NEXT) {
+			desc = &vq->desc[desc->next];
+			vb_offset = 0;
+			vb_avail = desc->len;
+		} else {
+			vb_offset = vq->vhost_hlen;
+			vb_avail = desc->len - vb_offset;
+		}
 
 		/* Buffer address translation. */
 		vb_addr = gpa_to_vva(dev, desc->addr);
@@ -608,8 +645,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		vq->used->ring[used_idx].id = head[entry_success];
 		vq->used->ring[used_idx].len = 0;
 
-		vb_offset = 0;
-		vb_avail = desc->len;
 		/* Allocate an mbuf and populate the structure. */
 		m = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(m == NULL)) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 2/5] lib_vhost: Refine code style
  2015-05-28 15:16 ` [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
@ 2015-05-28 15:16   ` Ouyang Changchun
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 3/5] lib_vhost: Extract function Ouyang Changchun
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-05-28 15:16 UTC (permalink / raw)
  To: dev

Remove unnecessary new line.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 06ae2df..a15578b 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -248,8 +248,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	 * (guest physical addr -> vhost virtual addr)
 	 */
 	vq = dev->virtqueue[VIRTIO_RXQ];
-	vb_addr =
-		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 	vb_hdr_addr = vb_addr;
 
 	/* Prefetch buffer address. */
@@ -267,8 +266,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 
 	seg_avail = rte_pktmbuf_data_len(pkt);
 	vb_offset = vq->vhost_hlen;
-	vb_avail =
-		vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
+	vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
 
 	entry_len = vq->vhost_hlen;
 
@@ -291,8 +289,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 		}
 
 		vec_idx++;
-		vb_addr =
-			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+		vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)vb_addr);
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 3/5] lib_vhost: Extract function
  2015-05-28 15:16 ` [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 2/5] lib_vhost: Refine code style Ouyang Changchun
@ 2015-05-28 15:16   ` Ouyang Changchun
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 4/5] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-05-28 15:16 UTC (permalink / raw)
  To: dev

Extract codes into 2 common functions:
update_secure_len which is used to accumulate the buffer len in the vring descriptors.
and fill_buf_vec which is used to fill struct buf_vec.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 79 +++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 32 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index a15578b..24de6f4 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -419,6 +419,49 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	return entry_success;
 }
 
+static inline void __attribute__((always_inline))
+update_secure_len(struct vhost_virtqueue *vq, uint32_t idx,
+	uint32_t *secure_len)
+{
+	uint8_t next_desc = 0;
+	uint32_t len = *secure_len;
+
+	do {
+		next_desc = 0;
+		len += vq->desc[idx].len;
+		if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
+			idx = vq->desc[idx].next;
+			next_desc = 1;
+		}
+	} while (next_desc);
+
+	*secure_len = len;
+}
+
+static inline void __attribute__((always_inline))
+fill_buf_vec(struct vhost_virtqueue *vq, uint16_t id, uint32_t *vec_idx)
+{
+	uint16_t wrapped_idx = id & (vq->size - 1);
+	uint32_t idx = vq->avail->ring[wrapped_idx];
+	uint8_t next_desc;
+	uint32_t vec_id = *vec_idx;
+
+	do {
+		next_desc = 0;
+		vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
+		vq->buf_vec[vec_id].buf_len = vq->desc[idx].len;
+		vq->buf_vec[vec_id].desc_idx = idx;
+		vec_id++;
+
+		if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
+			idx = vq->desc[idx].next;
+			next_desc = 1;
+		}
+	} while (next_desc);
+
+	*vec_idx = vec_id;
+}
+
 /*
  * This function works for mergeable RX.
  */
@@ -449,7 +492,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		uint16_t need_cnt;
 		uint32_t vec_idx = 0;
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
-		uint16_t i, id;
+		uint16_t i;
 
 		do {
 			/*
@@ -473,18 +516,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 						(res_cur_idx) & (vq->size - 1);
 					uint32_t idx =
 						vq->avail->ring[wrapped_idx];
-					uint8_t next_desc;
-
-					do {
-						next_desc = 0;
-						secure_len += vq->desc[idx].len;
-						if (vq->desc[idx].flags &
-							VRING_DESC_F_NEXT) {
-							idx = vq->desc[idx].next;
-							next_desc = 1;
-						}
-					} while (next_desc);
 
+					update_secure_len(vq, idx, &secure_len);
 					res_cur_idx++;
 				}
 			} while (pkt_len > secure_len);
@@ -495,28 +528,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 							res_cur_idx);
 		} while (success == 0);
 
-		id = res_base_idx;
 		need_cnt = res_cur_idx - res_base_idx;
 
-		for (i = 0; i < need_cnt; i++, id++) {
-			uint16_t wrapped_idx = id & (vq->size - 1);
-			uint32_t idx = vq->avail->ring[wrapped_idx];
-			uint8_t next_desc;
-			do {
-				next_desc = 0;
-				vq->buf_vec[vec_idx].buf_addr =
-					vq->desc[idx].addr;
-				vq->buf_vec[vec_idx].buf_len =
-					vq->desc[idx].len;
-				vq->buf_vec[vec_idx].desc_idx = idx;
-				vec_idx++;
-
-				if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
-					idx = vq->desc[idx].next;
-					next_desc = 1;
-				}
-			} while (next_desc);
-		}
+		for (i = 0; i < need_cnt; i++)
+			fill_buf_vec(vq, res_base_idx + i, &vec_idx);
 
 		res_end_idx = res_cur_idx;
 
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 4/5] lib_vhost: Remove unnecessary vring descriptor length updating
  2015-05-28 15:16 ` [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue Ouyang Changchun
                     ` (2 preceding siblings ...)
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 3/5] lib_vhost: Extract function Ouyang Changchun
@ 2015-05-28 15:16   ` Ouyang Changchun
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 5/5] lib_vhost: Add support copying scattered mbuf to vring Ouyang Changchun
  2015-06-01  8:25   ` [dpdk-dev] [PATCH v3 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  5 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-05-28 15:16 UTC (permalink / raw)
  To: dev

Remove these unnecessary vring descriptor length updating, vhost should not change them.
virtio in front end should assign value to desc.len for both rx and tx.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 24de6f4..bb56ae1 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -135,7 +135,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		 * placed in separate buffers.
 		 */
 		if (desc->flags & VRING_DESC_F_NEXT) {
-			desc->len = vq->vhost_hlen;
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
@@ -273,7 +272,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	if (vb_avail == 0) {
 		uint32_t desc_idx =
 			vq->buf_vec[vec_idx].desc_idx;
-		vq->desc[desc_idx].len = vq->vhost_hlen;
 
 		if ((vq->desc[desc_idx].flags
 			& VRING_DESC_F_NEXT) == 0) {
@@ -357,7 +355,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 					 */
 					uint32_t desc_idx =
 						vq->buf_vec[vec_idx].desc_idx;
-					vq->desc[desc_idx].len = vb_offset;
 
 					if ((vq->desc[desc_idx].flags &
 						VRING_DESC_F_NEXT) == 0) {
@@ -392,26 +389,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 				/*
 				 * This whole packet completes.
 				 */
-				uint32_t desc_idx =
-					vq->buf_vec[vec_idx].desc_idx;
-				vq->desc[desc_idx].len = vb_offset;
-
-				while (vq->desc[desc_idx].flags &
-					VRING_DESC_F_NEXT) {
-					desc_idx = vq->desc[desc_idx].next;
-					 vq->desc[desc_idx].len = 0;
-				}
-
 				/* Update used ring with desc information */
 				vq->used->ring[cur_idx & (vq->size - 1)].id
 					= vq->buf_vec[vec_idx].desc_idx;
 				vq->used->ring[cur_idx & (vq->size - 1)].len
 					= entry_len;
-				entry_len = 0;
-				cur_idx++;
 				entry_success++;
-				seg_avail = 0;
-				cpy_len = RTE_MIN(vb_avail, seg_avail);
+				break;
 			}
 		}
 	}
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 5/5] lib_vhost: Add support copying scattered mbuf to vring
  2015-05-28 15:16 ` [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue Ouyang Changchun
                     ` (3 preceding siblings ...)
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 4/5] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
@ 2015-05-28 15:16   ` Ouyang Changchun
  2015-05-31  9:10     ` Xie, Huawei
  2015-06-01  8:25   ` [dpdk-dev] [PATCH v3 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  5 siblings, 1 reply; 52+ messages in thread
From: Ouyang Changchun @ 2015-05-28 15:16 UTC (permalink / raw)
  To: dev

Add support copying scattered mbuf to vring which is done by dev_scatter_rx,
and check the 'next' pointer in mbuf on the fly to select suitable function to rx packets.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 116 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 115 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bb56ae1..3086bb4 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -46,7 +46,8 @@
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
  * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when mergeable is disabled.
+ * added to the RX queue. This function works when mergeable is disabled and
+ * the mbuf is not scattered.
  */
 static inline uint32_t __attribute__((always_inline))
 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
@@ -447,6 +448,103 @@ fill_buf_vec(struct vhost_virtqueue *vq, uint16_t id, uint32_t *vec_idx)
 }
 
 /*
+ * This function works for scatter-gather RX.
+ */
+static inline uint32_t __attribute__((always_inline))
+virtio_dev_scatter_rx(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint32_t count)
+{
+	struct vhost_virtqueue *vq;
+	uint32_t pkt_idx = 0, entry_success = 0;
+	uint16_t avail_idx;
+	uint16_t res_base_idx, res_end_idx;
+	uint8_t success = 0;
+
+	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_scatter_rx()\n",
+		dev->device_fh);
+	if (unlikely(queue_id != VIRTIO_RXQ))
+		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+
+	vq = dev->virtqueue[VIRTIO_RXQ];
+	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+
+	if (count == 0)
+		return 0;
+
+	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+		uint32_t secure_len = 0;
+		uint32_t vec_idx = 0;
+		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
+
+		do {
+			/*
+			 * As many data cores may want access to available
+			 * buffers, they need to be reserved.
+			 */
+			res_base_idx = vq->last_used_idx_res;
+			avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+
+			if (unlikely(res_base_idx == avail_idx)) {
+				LOG_DEBUG(VHOST_DATA,
+					"(%"PRIu64") Failed "
+					"to get enough desc from "
+					"vring\n",
+					dev->device_fh);
+				return pkt_idx;
+			} else {
+				uint16_t wrapped_idx =
+					(res_base_idx) & (vq->size - 1);
+				uint32_t idx = vq->avail->ring[wrapped_idx];
+
+				update_secure_len(vq, idx, &secure_len);
+			}
+
+			if (pkt_len > secure_len) {
+				LOG_DEBUG(VHOST_DATA,
+					"(%"PRIu64") Failed "
+					"to get enough desc from "
+					"vring\n",
+					dev->device_fh);
+				return pkt_idx;
+			}
+
+			/* vq->last_used_idx_res is atomically updated. */
+			success = rte_atomic16_cmpset(&vq->last_used_idx_res,
+							res_base_idx,
+							res_base_idx + 1);
+		} while (success == 0);
+
+		fill_buf_vec(vq, res_base_idx, &vec_idx);
+
+		res_end_idx = res_base_idx + 1;
+
+		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
+			res_end_idx, pkts[pkt_idx]);
+
+		rte_compiler_barrier();
+
+		/*
+		 * Wait until it's our turn to add our buffer
+		 * to the used ring.
+		 */
+		while (unlikely(vq->last_used_idx != res_base_idx))
+			rte_pause();
+
+		*(volatile uint16_t *)&vq->used->idx += entry_success;
+		vq->last_used_idx = res_end_idx;
+
+		/* flush used->idx update before we read avail->flags. */
+		rte_mb();
+
+		/* Kick the guest if necessary. */
+		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+			eventfd_write((int)vq->callfd, 1);
+	}
+
+	return count;
+}
+
+/*
  * This function works for mergeable RX.
  */
 static inline uint32_t __attribute__((always_inline))
@@ -545,12 +643,28 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 	return count;
 }
 
+/*
+ * Return 1 if any mbuf is scattered, otherwise return 0.
+ */
+static inline uint32_t __attribute__((always_inline))
+check_scatter(struct rte_mbuf **pkts, uint16_t count)
+{
+	uint32_t i;
+	for (i = 0; i < count; i++) {
+		if (pkts[i]->next != NULL)
+			return 1;
+	}
+	return 0;
+}
+
 uint16_t
 rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
 	struct rte_mbuf **pkts, uint16_t count)
 {
 	if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
 		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
+	else if (unlikely(check_scatter(pkts, count) == 1))
+		return virtio_dev_scatter_rx(dev, queue_id, pkts, count);
 	else
 		return virtio_dev_rx(dev, queue_id, pkts, count);
 }
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
@ 2015-05-31  5:03     ` Xie, Huawei
  2015-05-31 13:20       ` Ouyang, Changchun
  2015-05-31  8:40     ` Xie, Huawei
  1 sibling, 1 reply; 52+ messages in thread
From: Xie, Huawei @ 2015-05-31  5:03 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

On 5/28/2015 11:17 PM, Ouyang, Changchun wrote:
> Vring enqueue need consider the 2 cases:
>  1. Vring descriptors chained together, the first one is for virtio header, the rest are for real
>     data, virtio driver in Linux usually use this scheme;
>  2. Only one descriptor, virtio header and real data share one single descriptor, virtio-net pmd use
>     such scheme;
For the commit message, :), actually we should consider the desc chain
as logically continuous memory space, so there is also the case like
desc 1: virtio header and data; descs followed: data only.

> So does vring dequeue, it should not assume vring descriptor is chained or not chained, virtio in
> different Linux version has different behavior, e.g. fedora 20 use chained vring descriptor, while
> fedora 21 use one single vring descriptor for tx.
This behavior could be configured. Besides it is not bound to
distribution but virtio-net driver.
They key thing is we should consider the generic case, rather than
fitting the requirement of existing  virtio-net implementation, so
suggest remove the above message.
>
> Changes in v2
>   - drop the uncompleted packet
>   - refine code logic
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_vhost/vhost_rxtx.c | 65 +++++++++++++++++++++++++++++++++----------
>  1 file changed, 50 insertions(+), 15 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 4809d32..06ae2df 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>  	uint64_t buff_addr = 0;
>  	uint64_t buff_hdr_addr = 0;
> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> +	uint32_t head[MAX_PKT_BURST];
>  	uint32_t head_idx, packet_success = 0;
>  	uint16_t avail_idx, res_cur_idx;
>  	uint16_t res_base_idx, res_end_idx;
> @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>  
>  	while (res_cur_idx != res_end_idx) {
> +		uint32_t offset = 0;
> +		uint32_t data_len, len_to_cpy;
> +		uint8_t hdr = 0, uncompleted_pkt = 0;
> +
>  		/* Get descriptor from available ring */
>  		desc = &vq->desc[head[packet_success]];
>  
> @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  
>  		/* Copy virtio_hdr to packet and increment buffer address */
>  		buff_hdr_addr = buff_addr;
> -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>  
>  		/*
>  		 * If the descriptors are chained the header and data are
> @@ -136,28 +139,55 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  			desc = &vq->desc[desc->next];
>  			/* Buffer address translation. */
>  			buff_addr = gpa_to_vva(dev, desc->addr);
I am wondering if there is the possibility the [GPA, GPA+desc->len]
could cross multiple memory regions.
Don't expect to fix in this patch, :).
> -			desc->len = rte_pktmbuf_data_len(buff);
>  		} else {
>  			buff_addr += vq->vhost_hlen;
> -			desc->len = packet_len;
> +			hdr = 1;
>  		}
>  
> +		data_len = rte_pktmbuf_data_len(buff);
> +		len_to_cpy = RTE_MIN(data_len,
> +			hdr ? desc->len - vq->vhost_hlen : desc->len);
> +		while (len_to_cpy > 0) {
> +			/* Copy mbuf data to buffer */
> +			rte_memcpy((void *)(uintptr_t)buff_addr,
> +				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
> +				len_to_cpy);
> +			PRINT_PACKET(dev, (uintptr_t)buff_addr,
> +				len_to_cpy, 0);
> +
> +			offset += len_to_cpy;
> +
> +			if (offset == data_len)
> +				break;
I don't understand here. If offset reaches the end of the first segment,
why don't we continue to copy from the next segment?

> +
> +			if (desc->flags & VRING_DESC_F_NEXT) {
> +				desc = &vq->desc[desc->next];
> +				buff_addr = gpa_to_vva(dev, desc->addr);
> +				len_to_cpy = RTE_MIN(data_len - offset, desc->len);
> +			} else {
> +				/* Room in vring buffer is not enough */
> +				uncompleted_pkt = 1;
> +				break;
> +			}
> +		};
> +
>  		/* Update used ring with desc information */
>  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
>  							head[packet_success];
> -		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
>  
> -		/* Copy mbuf data to buffer */
> -		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
> -		rte_memcpy((void *)(uintptr_t)buff_addr,
> -			rte_pktmbuf_mtod(buff, const void *),
> -			rte_pktmbuf_data_len(buff));
> -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> -			rte_pktmbuf_data_len(buff), 0);
> +		/* Drop the packet if it is uncompleted */
> +		if (unlikely(uncompleted_pkt == 1))
> +			vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;
> +		else
> +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> +							offset + vq->vhost_hlen;
>  
>  		res_cur_idx++;
>  		packet_success++;
>  
> +		if (unlikely(uncompleted_pkt == 1))
> +			continue;
> +
>  		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
>  			(const void *)&virtio_hdr, vq->vhost_hlen);
>  
> @@ -589,7 +619,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		desc = &vq->desc[head[entry_success]];
>  
>  		/* Discard first buffer as it is the virtio header */
> -		desc = &vq->desc[desc->next];
> +		if (desc->flags & VRING_DESC_F_NEXT) {
> +			desc = &vq->desc[desc->next];
> +			vb_offset = 0;
> +			vb_avail = desc->len;
> +		} else {
> +			vb_offset = vq->vhost_hlen;
> +			vb_avail = desc->len - vb_offset;
> +		}
>  
>  		/* Buffer address translation. */
>  		vb_addr = gpa_to_vva(dev, desc->addr);
> @@ -608,8 +645,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		vq->used->ring[used_idx].id = head[entry_success];
>  		vq->used->ring[used_idx].len = 0;
>  
> -		vb_offset = 0;
> -		vb_avail = desc->len;
>  		/* Allocate an mbuf and populate the structure. */
>  		m = rte_pktmbuf_alloc(mbuf_pool);
>  		if (unlikely(m == NULL)) {


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
  2015-05-31  5:03     ` Xie, Huawei
@ 2015-05-31  8:40     ` Xie, Huawei
  2015-05-31 12:59       ` Ouyang, Changchun
  2015-05-31 13:33       ` Ouyang, Changchun
  1 sibling, 2 replies; 52+ messages in thread
From: Xie, Huawei @ 2015-05-31  8:40 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

On 5/28/2015 11:17 PM, Ouyang, Changchun wrote:
> Vring enqueue need consider the 2 cases:
>  1. Vring descriptors chained together, the first one is for virtio header, the rest are for real
>     data, virtio driver in Linux usually use this scheme;
>  2. Only one descriptor, virtio header and real data share one single descriptor, virtio-net pmd use
>     such scheme;
>
> So does vring dequeue, it should not assume vring descriptor is chained or not chained, virtio in
> different Linux version has different behavior, e.g. fedora 20 use chained vring descriptor, while
> fedora 21 use one single vring descriptor for tx.
>
> Changes in v2
>   - drop the uncompleted packet
>   - refine code logic
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_vhost/vhost_rxtx.c | 65 +++++++++++++++++++++++++++++++++----------
>  1 file changed, 50 insertions(+), 15 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 4809d32..06ae2df 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>  	uint64_t buff_addr = 0;
>  	uint64_t buff_hdr_addr = 0;
> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> +	uint32_t head[MAX_PKT_BURST];
>  	uint32_t head_idx, packet_success = 0;
>  	uint16_t avail_idx, res_cur_idx;
>  	uint16_t res_base_idx, res_end_idx;
> @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>  
>  	while (res_cur_idx != res_end_idx) {
> +		uint32_t offset = 0;
> +		uint32_t data_len, len_to_cpy;
> +		uint8_t hdr = 0, uncompleted_pkt = 0;
> +
>  		/* Get descriptor from available ring */
>  		desc = &vq->desc[head[packet_success]];
>  
> @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  
>  		/* Copy virtio_hdr to packet and increment buffer address */
>  		buff_hdr_addr = buff_addr;
> -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>  
>  		/*
>  		 * If the descriptors are chained the header and data are
> @@ -136,28 +139,55 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  			desc = &vq->desc[desc->next];
>  			/* Buffer address translation. */
>  			buff_addr = gpa_to_vva(dev, desc->addr);
> -			desc->len = rte_pktmbuf_data_len(buff);
Do we got confirm from virtio SPEC that it is OK to only update used->len?
>  		} else {
>  			buff_addr += vq->vhost_hlen;
> -			desc->len = packet_len;
> +			hdr = 1;
>  		}
>  
> +		data_len = rte_pktmbuf_data_len(buff);
> +		len_to_cpy = RTE_MIN(data_len,
> +			hdr ? desc->len - vq->vhost_hlen : desc->len);
> +		while (len_to_cpy > 0) {
> +			/* Copy mbuf data to buffer */
> +			rte_memcpy((void *)(uintptr_t)buff_addr,
> +				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
> +				len_to_cpy);
> +			PRINT_PACKET(dev, (uintptr_t)buff_addr,
> +				len_to_cpy, 0);
> +
> +			offset += len_to_cpy;
> +
> +			if (offset == data_len)
> +				break;
Ok, i see scatter gather case handling is in patch 5.
> +
> +			if (desc->flags & VRING_DESC_F_NEXT) {
> +				desc = &vq->desc[desc->next];
> +				buff_addr = gpa_to_vva(dev, desc->addr);
> +				len_to_cpy = RTE_MIN(data_len - offset, desc->len);
> +			} else {
> +				/* Room in vring buffer is not enough */
> +				uncompleted_pkt = 1;
> +				break;
> +			}
> +		};
> +
>  		/* Update used ring with desc information */
>  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
>  							head[packet_success];
> -		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
>  
> -		/* Copy mbuf data to buffer */
> -		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
> -		rte_memcpy((void *)(uintptr_t)buff_addr,
> -			rte_pktmbuf_mtod(buff, const void *),
> -			rte_pktmbuf_data_len(buff));
> -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> -			rte_pktmbuf_data_len(buff), 0);
> +		/* Drop the packet if it is uncompleted */
> +		if (unlikely(uncompleted_pkt == 1))
> +			vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;
Here things become complicated with the previous lockless reserve.
What is the consequence when guest sees zero in used->len? At least, do
we check with virtio-net implementation?

> +		else
> +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> +							offset + vq->vhost_hlen;
Two questions here,
1.  add virtio header len?
2.  Why not use packet_len rather than offset?
>  
>  		res_cur_idx++;
>  		packet_success++;
>  
> +		if (unlikely(uncompleted_pkt == 1))
> +			continue;
> +
>  		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
>  			(const void *)&virtio_hdr, vq->vhost_hlen);
>  
> @@ -589,7 +619,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		desc = &vq->desc[head[entry_success]];
>  
>  		/* Discard first buffer as it is the virtio header */
> -		desc = &vq->desc[desc->next];
> +		if (desc->flags & VRING_DESC_F_NEXT) {
> +			desc = &vq->desc[desc->next];
> +			vb_offset = 0;
> +			vb_avail = desc->len;
> +		} else {
> +			vb_offset = vq->vhost_hlen;
> +			vb_avail = desc->len - vb_offset;
> +		}
>  
>  		/* Buffer address translation. */
>  		vb_addr = gpa_to_vva(dev, desc->addr);
> @@ -608,8 +645,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		vq->used->ring[used_idx].id = head[entry_success];
>  		vq->used->ring[used_idx].len = 0;
>  
> -		vb_offset = 0;
> -		vb_avail = desc->len;
>  		/* Allocate an mbuf and populate the structure. */
>  		m = rte_pktmbuf_alloc(mbuf_pool);
>  		if (unlikely(m == NULL)) {


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 5/5] lib_vhost: Add support copying scattered mbuf to vring
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 5/5] lib_vhost: Add support copying scattered mbuf to vring Ouyang Changchun
@ 2015-05-31  9:10     ` Xie, Huawei
  2015-05-31 13:07       ` Ouyang, Changchun
  0 siblings, 1 reply; 52+ messages in thread
From: Xie, Huawei @ 2015-05-31  9:10 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

virtio_dev_rx & scatter_rx & merge-able rx should be merged and the code
could be much simpler, unless there is special performance consideration.


On 5/28/2015 11:17 PM, Ouyang, Changchun wrote:
> Add support copying scattered mbuf to vring which is done by dev_scatter_rx,
> and check the 'next' pointer in mbuf on the fly to select suitable function to rx packets.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_vhost/vhost_rxtx.c | 116 +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 115 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index bb56ae1..3086bb4 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -46,7 +46,8 @@
>   * This function adds buffers to the virtio devices RX virtqueue. Buffers can
>   * be received from the physical port or from another virtio device. A packet
>   * count is returned to indicate the number of packets that are succesfully
> - * added to the RX queue. This function works when mergeable is disabled.
> + * added to the RX queue. This function works when mergeable is disabled and
> + * the mbuf is not scattered.
>   */
>  static inline uint32_t __attribute__((always_inline))
>  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
> @@ -447,6 +448,103 @@ fill_buf_vec(struct vhost_virtqueue *vq, uint16_t id, uint32_t *vec_idx)
>  }
>  
>  /*
> + * This function works for scatter-gather RX.
> + */
> +static inline uint32_t __attribute__((always_inline))
> +virtio_dev_scatter_rx(struct virtio_net *dev, uint16_t queue_id,
> +	struct rte_mbuf **pkts, uint32_t count)
> +{
> +	struct vhost_virtqueue *vq;
> +	uint32_t pkt_idx = 0, entry_success = 0;
> +	uint16_t avail_idx;
> +	uint16_t res_base_idx, res_end_idx;
> +	uint8_t success = 0;
> +
> +	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_scatter_rx()\n",
> +		dev->device_fh);
use __func__
> +	if (unlikely(queue_id != VIRTIO_RXQ))
> +		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
> +
> +	vq = dev->virtqueue[VIRTIO_RXQ];
> +	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
> +
> +	if (count == 0)
> +		return 0;
> +
> +	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
> +		uint32_t secure_len = 0;
> +		uint32_t vec_idx = 0;
> +		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
> +
> +		do {
> +			/*
> +			 * As many data cores may want access to available
> +			 * buffers, they need to be reserved.
> +			 */
> +			res_base_idx = vq->last_used_idx_res;
> +			avail_idx = *((volatile uint16_t *)&vq->avail->idx);
> +
> +			if (unlikely(res_base_idx == avail_idx)) {
> +				LOG_DEBUG(VHOST_DATA,
> +					"(%"PRIu64") Failed "
> +					"to get enough desc from "
> +					"vring\n",
> +					dev->device_fh);
> +				return pkt_idx;
> +			} else {
> +				uint16_t wrapped_idx =
> +					(res_base_idx) & (vq->size - 1);
> +				uint32_t idx = vq->avail->ring[wrapped_idx];
> +
> +				update_secure_len(vq, idx, &secure_len);
> +			}
> +
> +			if (pkt_len > secure_len) {
> +				LOG_DEBUG(VHOST_DATA,
> +					"(%"PRIu64") Failed "
> +					"to get enough desc from "
> +					"vring\n",
> +					dev->device_fh);
> +				return pkt_idx;
> +			}
The behavior for virtio_dev_rx and virtio_dev_merge_rx is totally
different. I think they should behave in the same way.
virtio_dev_rx updates used->len to zero while this one returns immediately.

Besides, with this implementation, if the caller retransmit the
mbuf(which has pkt_len larger the secure_len), it will enter endless loop.

> +
> +			/* vq->last_used_idx_res is atomically updated. */
> +			success = rte_atomic16_cmpset(&vq->last_used_idx_res,
> +							res_base_idx,
> +							res_base_idx + 1);
> +		} while (success == 0);

Here the behavior becomes different again in reserving vring entries.

> +
> +		fill_buf_vec(vq, res_base_idx, &vec_idx);
> +
> +		res_end_idx = res_base_idx + 1;
> +
> +		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
> +			res_end_idx, pkts[pkt_idx]);
> +
> +		rte_compiler_barrier();
> +
> +		/*
> +		 * Wait until it's our turn to add our buffer
> +		 * to the used ring.
> +		 */
> +		while (unlikely(vq->last_used_idx != res_base_idx))
> +			rte_pause();
> +
> +		*(volatile uint16_t *)&vq->used->idx += entry_success;
> +		vq->last_used_idx = res_end_idx;
> +
> +		/* flush used->idx update before we read avail->flags. */
> +		rte_mb();
> +
> +		/* Kick the guest if necessary. */
> +		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
> +			eventfd_write((int)vq->callfd, 1);
> +	}
> +
> +	return count;
> +}
> +
> +/*
>   * This function works for mergeable RX.
>   */
>  static inline uint32_t __attribute__((always_inline))
> @@ -545,12 +643,28 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
>  	return count;
>  }
>  
> +/*
> + * Return 1 if any mbuf is scattered, otherwise return 0.
> + */
> +static inline uint32_t __attribute__((always_inline))
> +check_scatter(struct rte_mbuf **pkts, uint16_t count)
> +{
> +	uint32_t i;
> +	for (i = 0; i < count; i++) {
> +		if (pkts[i]->next != NULL)
> +			return 1;
> +	}
> +	return 0;
> +}
> +
>  uint16_t
>  rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>  	struct rte_mbuf **pkts, uint16_t count)
>  {
>  	if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
>  		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
> +	else if (unlikely(check_scatter(pkts, count) == 1))
> +		return virtio_dev_scatter_rx(dev, queue_id, pkts, count);
>  	else
>  		return virtio_dev_rx(dev, queue_id, pkts, count);
>  }


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-05-31  8:40     ` Xie, Huawei
@ 2015-05-31 12:59       ` Ouyang, Changchun
  2015-05-31 13:22         ` Ouyang, Changchun
  2015-05-31 13:33       ` Ouyang, Changchun
  1 sibling, 1 reply; 52+ messages in thread
From: Ouyang, Changchun @ 2015-05-31 12:59 UTC (permalink / raw)
  To: Xie, Huawei, dev



> -----Original Message-----
> From: Xie, Huawei
> Sent: Sunday, May 31, 2015 4:41 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Cc: Cao, Waterman
> Subject: Re: [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle
> chained vring descriptors
> 
> On 5/28/2015 11:17 PM, Ouyang, Changchun wrote:
> > Vring enqueue need consider the 2 cases:
> >  1. Vring descriptors chained together, the first one is for virtio header, the
> rest are for real
> >     data, virtio driver in Linux usually use this scheme;  2. Only one
> > descriptor, virtio header and real data share one single descriptor, virtio-
> net pmd use
> >     such scheme;
> >
> > So does vring dequeue, it should not assume vring descriptor is
> > chained or not chained, virtio in different Linux version has
> > different behavior, e.g. fedora 20 use chained vring descriptor, while
> fedora 21 use one single vring descriptor for tx.
> >
> > Changes in v2
> >   - drop the uncompleted packet
> >   - refine code logic
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >  lib/librte_vhost/vhost_rxtx.c | 65
> > +++++++++++++++++++++++++++++++++----------
> >  1 file changed, 50 insertions(+), 15 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost_rxtx.c
> > b/lib/librte_vhost/vhost_rxtx.c index 4809d32..06ae2df 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> >  	uint64_t buff_addr = 0;
> >  	uint64_t buff_hdr_addr = 0;
> > -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> > +	uint32_t head[MAX_PKT_BURST];
> >  	uint32_t head_idx, packet_success = 0;
> >  	uint16_t avail_idx, res_cur_idx;
> >  	uint16_t res_base_idx, res_end_idx;
> > @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	rte_prefetch0(&vq->desc[head[packet_success]]);
> >
> >  	while (res_cur_idx != res_end_idx) {
> > +		uint32_t offset = 0;
> > +		uint32_t data_len, len_to_cpy;
> > +		uint8_t hdr = 0, uncompleted_pkt = 0;
> > +
> >  		/* Get descriptor from available ring */
> >  		desc = &vq->desc[head[packet_success]];
> >
> > @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,
> >
> >  		/* Copy virtio_hdr to packet and increment buffer address */
> >  		buff_hdr_addr = buff_addr;
> > -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> >
> >  		/*
> >  		 * If the descriptors are chained the header and data are @@
> > -136,28 +139,55 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  			desc = &vq->desc[desc->next];
> >  			/* Buffer address translation. */
> >  			buff_addr = gpa_to_vva(dev, desc->addr);
> > -			desc->len = rte_pktmbuf_data_len(buff);
> Do we got confirm from virtio SPEC that it is OK to only update used->len?

Virtio Spec don't require vhost update desc->len.


> >  		} else {
> >  			buff_addr += vq->vhost_hlen;
> > -			desc->len = packet_len;
> > +			hdr = 1;
> >  		}
> >
> > +		data_len = rte_pktmbuf_data_len(buff);
> > +		len_to_cpy = RTE_MIN(data_len,
> > +			hdr ? desc->len - vq->vhost_hlen : desc->len);
> > +		while (len_to_cpy > 0) {
> > +			/* Copy mbuf data to buffer */
> > +			rte_memcpy((void *)(uintptr_t)buff_addr,
> > +				(const void *)(rte_pktmbuf_mtod(buff,
> const char *) + offset),
> > +				len_to_cpy);
> > +			PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > +				len_to_cpy, 0);
> > +
> > +			offset += len_to_cpy;
> > +
> > +			if (offset == data_len)
> > +				break;
> Ok, i see scatter gather case handling is in patch 5.
> > +
> > +			if (desc->flags & VRING_DESC_F_NEXT) {
> > +				desc = &vq->desc[desc->next];
> > +				buff_addr = gpa_to_vva(dev, desc->addr);
> > +				len_to_cpy = RTE_MIN(data_len - offset,
> desc->len);
> > +			} else {
> > +				/* Room in vring buffer is not enough */
> > +				uncompleted_pkt = 1;
> > +				break;
> > +			}
> > +		};
> > +
> >  		/* Update used ring with desc information */
> >  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
> >
> 	head[packet_success];
> > -		vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> packet_len;
> >
> > -		/* Copy mbuf data to buffer */
> > -		/* FIXME for sg mbuf and the case that desc couldn't hold the
> mbuf data */
> > -		rte_memcpy((void *)(uintptr_t)buff_addr,
> > -			rte_pktmbuf_mtod(buff, const void *),
> > -			rte_pktmbuf_data_len(buff));
> > -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > -			rte_pktmbuf_data_len(buff), 0);
> > +		/* Drop the packet if it is uncompleted */
> > +		if (unlikely(uncompleted_pkt == 1))
> > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;
> Here things become complicated with the previous lockless reserve.

Why it become complicated? Len = 0 means it contain any meaningful data in the buffer.

> What is the consequence when guest sees zero in used->len? At least, do we
> check with virtio-net implementation?

> 
> > +		else
> > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> > +							offset + vq-
> >vhost_hlen;
> Two questions here,
> 1.  add virtio header len?
> 2.  Why not use packet_len rather than offset?
> >
> >  		res_cur_idx++;
> >  		packet_success++;
> >
> > +		if (unlikely(uncompleted_pkt == 1))
> > +			continue;
> > +
> >  		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
> >  			(const void *)&virtio_hdr, vq->vhost_hlen);
> >
> > @@ -589,7 +619,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev,
> uint16_t queue_id,
> >  		desc = &vq->desc[head[entry_success]];
> >
> >  		/* Discard first buffer as it is the virtio header */
> > -		desc = &vq->desc[desc->next];
> > +		if (desc->flags & VRING_DESC_F_NEXT) {
> > +			desc = &vq->desc[desc->next];
> > +			vb_offset = 0;
> > +			vb_avail = desc->len;
> > +		} else {
> > +			vb_offset = vq->vhost_hlen;
> > +			vb_avail = desc->len - vb_offset;
> > +		}
> >
> >  		/* Buffer address translation. */
> >  		vb_addr = gpa_to_vva(dev, desc->addr); @@ -608,8 +645,6
> @@
> > rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> >  		vq->used->ring[used_idx].id = head[entry_success];
> >  		vq->used->ring[used_idx].len = 0;
> >
> > -		vb_offset = 0;
> > -		vb_avail = desc->len;
> >  		/* Allocate an mbuf and populate the structure. */
> >  		m = rte_pktmbuf_alloc(mbuf_pool);
> >  		if (unlikely(m == NULL)) {

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 5/5] lib_vhost: Add support copying scattered mbuf to vring
  2015-05-31  9:10     ` Xie, Huawei
@ 2015-05-31 13:07       ` Ouyang, Changchun
  0 siblings, 0 replies; 52+ messages in thread
From: Ouyang, Changchun @ 2015-05-31 13:07 UTC (permalink / raw)
  To: Xie, Huawei, dev



> -----Original Message-----
> From: Xie, Huawei
> Sent: Sunday, May 31, 2015 5:11 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Cc: Cao, Waterman
> Subject: Re: [PATCH v2 5/5] lib_vhost: Add support copying scattered mbuf
> to vring
> 
> virtio_dev_rx & scatter_rx & merge-able rx should be merged and the code
> could be much simpler, unless there is special performance consideration.
>
Then, any specific suggestion on how to merge them?
I do consider the performance influence here, so I think it deserve to have 3 implementation for different cases.
 
> 
> On 5/28/2015 11:17 PM, Ouyang, Changchun wrote:
> > Add support copying scattered mbuf to vring which is done by
> > dev_scatter_rx, and check the 'next' pointer in mbuf on the fly to select
> suitable function to rx packets.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >  lib/librte_vhost/vhost_rxtx.c | 116
> > +++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 115 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/librte_vhost/vhost_rxtx.c
> > b/lib/librte_vhost/vhost_rxtx.c index bb56ae1..3086bb4 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -46,7 +46,8 @@
> >   * This function adds buffers to the virtio devices RX virtqueue. Buffers can
> >   * be received from the physical port or from another virtio device. A
> packet
> >   * count is returned to indicate the number of packets that are
> > succesfully
> > - * added to the RX queue. This function works when mergeable is disabled.
> > + * added to the RX queue. This function works when mergeable is
> > + disabled and
> > + * the mbuf is not scattered.
> >   */
> >  static inline uint32_t __attribute__((always_inline))
> > virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, @@ -447,6
> > +448,103 @@ fill_buf_vec(struct vhost_virtqueue *vq, uint16_t id,
> > uint32_t *vec_idx)  }
> >
> >  /*
> > + * This function works for scatter-gather RX.
> > + */
> > +static inline uint32_t __attribute__((always_inline))
> > +virtio_dev_scatter_rx(struct virtio_net *dev, uint16_t queue_id,
> > +	struct rte_mbuf **pkts, uint32_t count) {
> > +	struct vhost_virtqueue *vq;
> > +	uint32_t pkt_idx = 0, entry_success = 0;
> > +	uint16_t avail_idx;
> > +	uint16_t res_base_idx, res_end_idx;
> > +	uint8_t success = 0;
> > +
> > +	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_scatter_rx()\n",
> > +		dev->device_fh);
> use __func__
> > +	if (unlikely(queue_id != VIRTIO_RXQ))
> > +		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this
> version.\n");
> > +
> > +	vq = dev->virtqueue[VIRTIO_RXQ];
> > +	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
> > +
> > +	if (count == 0)
> > +		return 0;
> > +
> > +	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
> > +		uint32_t secure_len = 0;
> > +		uint32_t vec_idx = 0;
> > +		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
> > +
> > +		do {
> > +			/*
> > +			 * As many data cores may want access to available
> > +			 * buffers, they need to be reserved.
> > +			 */
> > +			res_base_idx = vq->last_used_idx_res;
> > +			avail_idx = *((volatile uint16_t *)&vq->avail->idx);
> > +
> > +			if (unlikely(res_base_idx == avail_idx)) {
> > +				LOG_DEBUG(VHOST_DATA,
> > +					"(%"PRIu64") Failed "
> > +					"to get enough desc from "
> > +					"vring\n",
> > +					dev->device_fh);
> > +				return pkt_idx;
> > +			} else {
> > +				uint16_t wrapped_idx =
> > +					(res_base_idx) & (vq->size - 1);
> > +				uint32_t idx = vq->avail->ring[wrapped_idx];
> > +
> > +				update_secure_len(vq, idx, &secure_len);
> > +			}
> > +
> > +			if (pkt_len > secure_len) {
> > +				LOG_DEBUG(VHOST_DATA,
> > +					"(%"PRIu64") Failed "
> > +					"to get enough desc from "
> > +					"vring\n",
> > +					dev->device_fh);
> > +				return pkt_idx;
> > +			}
> The behavior for virtio_dev_rx and virtio_dev_merge_rx is totally different. I
> think they should behave in the same way.
Why they have to work as same way?

> virtio_dev_rx updates used->len to zero while this one returns immediately.
> 
Yes, if it is uncompleted packets, I think it comes from your comments about dropping the packets if the room is not
Big enough to contain the whole packet.

> Besides, with this implementation, if the caller retransmit the mbuf(which
> has pkt_len larger the secure_len), it will enter endless loop.
Why the caller retransmit the mbuf? I think this is caller's bad, then endless loop just catch that issue in caller.

> 
> > +
> > +			/* vq->last_used_idx_res is atomically updated. */
> > +			success = rte_atomic16_cmpset(&vq-
> >last_used_idx_res,
> > +							res_base_idx,
> > +							res_base_idx + 1);
> > +		} while (success == 0);
> 
> Here the behavior becomes different again in reserving vring entries.
> 
> > +
> > +		fill_buf_vec(vq, res_base_idx, &vec_idx);
> > +
> > +		res_end_idx = res_base_idx + 1;
> > +
> > +		entry_success = copy_from_mbuf_to_vring(dev,
> res_base_idx,
> > +			res_end_idx, pkts[pkt_idx]);
> > +
> > +		rte_compiler_barrier();
> > +
> > +		/*
> > +		 * Wait until it's our turn to add our buffer
> > +		 * to the used ring.
> > +		 */
> > +		while (unlikely(vq->last_used_idx != res_base_idx))
> > +			rte_pause();
> > +
> > +		*(volatile uint16_t *)&vq->used->idx += entry_success;
> > +		vq->last_used_idx = res_end_idx;
> > +
> > +		/* flush used->idx update before we read avail->flags. */
> > +		rte_mb();
> > +
> > +		/* Kick the guest if necessary. */
> > +		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
> > +			eventfd_write((int)vq->callfd, 1);
> > +	}
> > +
> > +	return count;
> > +}
> > +
> > +/*
> >   * This function works for mergeable RX.
> >   */
> >  static inline uint32_t __attribute__((always_inline)) @@ -545,12
> > +643,28 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	return count;
> >  }
> >
> > +/*
> > + * Return 1 if any mbuf is scattered, otherwise return 0.
> > + */
> > +static inline uint32_t __attribute__((always_inline))
> > +check_scatter(struct rte_mbuf **pkts, uint16_t count) {
> > +	uint32_t i;
> > +	for (i = 0; i < count; i++) {
> > +		if (pkts[i]->next != NULL)
> > +			return 1;
> > +	}
> > +	return 0;
> > +}
> > +
> >  uint16_t
> >  rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
> >  	struct rte_mbuf **pkts, uint16_t count)  {
> >  	if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
> >  		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
> > +	else if (unlikely(check_scatter(pkts, count) == 1))
> > +		return virtio_dev_scatter_rx(dev, queue_id, pkts, count);
> >  	else
> >  		return virtio_dev_rx(dev, queue_id, pkts, count);  }

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-05-31  5:03     ` Xie, Huawei
@ 2015-05-31 13:20       ` Ouyang, Changchun
  0 siblings, 0 replies; 52+ messages in thread
From: Ouyang, Changchun @ 2015-05-31 13:20 UTC (permalink / raw)
  To: Xie, Huawei, dev



> -----Original Message-----
> From: Xie, Huawei
> Sent: Sunday, May 31, 2015 1:04 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Cc: Cao, Waterman
> Subject: Re: [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle
> chained vring descriptors
> 
> On 5/28/2015 11:17 PM, Ouyang, Changchun wrote:
> > Vring enqueue need consider the 2 cases:
> >  1. Vring descriptors chained together, the first one is for virtio header, the
> rest are for real
> >     data, virtio driver in Linux usually use this scheme;  2. Only one
> > descriptor, virtio header and real data share one single descriptor, virtio-
> net pmd use
> >     such scheme;
> For the commit message, :), actually we should consider the desc chain as
> logically continuous memory space, so there is also the case like desc 1: virtio
> header and data; descs followed: data only.
> 

Ok, make sense, will update the description a bit in next version. 

> > So does vring dequeue, it should not assume vring descriptor is
> > chained or not chained, virtio in different Linux version has
> > different behavior, e.g. fedora 20 use chained vring descriptor, while
> fedora 21 use one single vring descriptor for tx.
> This behavior could be configured. Besides it is not bound to distribution but
> virtio-net driver.
> They key thing is we should consider the generic case, rather than fitting the
> requirement of existing  virtio-net implementation, so suggest remove the
> above message.
It also makes sense, I can remove it in next version. 
> >
> > Changes in v2
> >   - drop the uncompleted packet
> >   - refine code logic
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >  lib/librte_vhost/vhost_rxtx.c | 65
> > +++++++++++++++++++++++++++++++++----------
> >  1 file changed, 50 insertions(+), 15 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost_rxtx.c
> > b/lib/librte_vhost/vhost_rxtx.c index 4809d32..06ae2df 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> >  	uint64_t buff_addr = 0;
> >  	uint64_t buff_hdr_addr = 0;
> > -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> > +	uint32_t head[MAX_PKT_BURST];
> >  	uint32_t head_idx, packet_success = 0;
> >  	uint16_t avail_idx, res_cur_idx;
> >  	uint16_t res_base_idx, res_end_idx;
> > @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	rte_prefetch0(&vq->desc[head[packet_success]]);
> >
> >  	while (res_cur_idx != res_end_idx) {
> > +		uint32_t offset = 0;
> > +		uint32_t data_len, len_to_cpy;
> > +		uint8_t hdr = 0, uncompleted_pkt = 0;
> > +
> >  		/* Get descriptor from available ring */
> >  		desc = &vq->desc[head[packet_success]];
> >
> > @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,
> >
> >  		/* Copy virtio_hdr to packet and increment buffer address */
> >  		buff_hdr_addr = buff_addr;
> > -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> >
> >  		/*
> >  		 * If the descriptors are chained the header and data are @@
> > -136,28 +139,55 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  			desc = &vq->desc[desc->next];
> >  			/* Buffer address translation. */
> >  			buff_addr = gpa_to_vva(dev, desc->addr);
> I am wondering if there is the possibility the [GPA, GPA+desc->len] could
> cross multiple memory regions.
> Don't expect to fix in this patch, :).
> > -			desc->len = rte_pktmbuf_data_len(buff);
> >  		} else {
> >  			buff_addr += vq->vhost_hlen;
> > -			desc->len = packet_len;
> > +			hdr = 1;
> >  		}
> >
> > +		data_len = rte_pktmbuf_data_len(buff);
> > +		len_to_cpy = RTE_MIN(data_len,
> > +			hdr ? desc->len - vq->vhost_hlen : desc->len);
> > +		while (len_to_cpy > 0) {
> > +			/* Copy mbuf data to buffer */
> > +			rte_memcpy((void *)(uintptr_t)buff_addr,
> > +				(const void *)(rte_pktmbuf_mtod(buff,
> const char *) + offset),
> > +				len_to_cpy);
> > +			PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > +				len_to_cpy, 0);
> > +
> > +			offset += len_to_cpy;
> > +
> > +			if (offset == data_len)
> > +				break;
> I don't understand here. If offset reaches the end of the first segment, why
> don't we continue to copy from the next segment?
Data_len is the total length of the whole packet rather than len of just one segment.
Equal means it has copied the whole packet into vring buf, then it could break this while loop,
And continue to handle next packet.

> 
> > +
> > +			if (desc->flags & VRING_DESC_F_NEXT) {
> > +				desc = &vq->desc[desc->next];
> > +				buff_addr = gpa_to_vva(dev, desc->addr);
> > +				len_to_cpy = RTE_MIN(data_len - offset,
> desc->len);
> > +			} else {
> > +				/* Room in vring buffer is not enough */
> > +				uncompleted_pkt = 1;
> > +				break;
> > +			}
> > +		};
> > +
> >  		/* Update used ring with desc information */
> >  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
> >
> 	head[packet_success];
> > -		vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> packet_len;
> >
> > -		/* Copy mbuf data to buffer */
> > -		/* FIXME for sg mbuf and the case that desc couldn't hold the
> mbuf data */
> > -		rte_memcpy((void *)(uintptr_t)buff_addr,
> > -			rte_pktmbuf_mtod(buff, const void *),
> > -			rte_pktmbuf_data_len(buff));
> > -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > -			rte_pktmbuf_data_len(buff), 0);
> > +		/* Drop the packet if it is uncompleted */
> > +		if (unlikely(uncompleted_pkt == 1))
> > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;
> > +		else
> > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> > +							offset + vq-
> >vhost_hlen;
> >
> >  		res_cur_idx++;
> >  		packet_success++;
> >
> > +		if (unlikely(uncompleted_pkt == 1))
> > +			continue;
> > +
> >  		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
> >  			(const void *)&virtio_hdr, vq->vhost_hlen);
> >
> > @@ -589,7 +619,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev,
> uint16_t queue_id,
> >  		desc = &vq->desc[head[entry_success]];
> >
> >  		/* Discard first buffer as it is the virtio header */
> > -		desc = &vq->desc[desc->next];
> > +		if (desc->flags & VRING_DESC_F_NEXT) {
> > +			desc = &vq->desc[desc->next];
> > +			vb_offset = 0;
> > +			vb_avail = desc->len;
> > +		} else {
> > +			vb_offset = vq->vhost_hlen;
> > +			vb_avail = desc->len - vb_offset;
> > +		}
> >
> >  		/* Buffer address translation. */
> >  		vb_addr = gpa_to_vva(dev, desc->addr); @@ -608,8 +645,6
> @@
> > rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> >  		vq->used->ring[used_idx].id = head[entry_success];
> >  		vq->used->ring[used_idx].len = 0;
> >
> > -		vb_offset = 0;
> > -		vb_avail = desc->len;
> >  		/* Allocate an mbuf and populate the structure. */
> >  		m = rte_pktmbuf_alloc(mbuf_pool);
> >  		if (unlikely(m == NULL)) {

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-05-31 12:59       ` Ouyang, Changchun
@ 2015-05-31 13:22         ` Ouyang, Changchun
  0 siblings, 0 replies; 52+ messages in thread
From: Ouyang, Changchun @ 2015-05-31 13:22 UTC (permalink / raw)
  To: Xie, Huawei, dev



> -----Original Message-----
> From: Ouyang, Changchun
> Sent: Sunday, May 31, 2015 9:00 PM
> To: Xie, Huawei; dev@dpdk.org
> Cc: Cao, Waterman; Ouyang, Changchun
> Subject: RE: [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle
> chained vring descriptors
> 
> 
> 
> > -----Original Message-----
> > From: Xie, Huawei
> > Sent: Sunday, May 31, 2015 4:41 PM
> > To: Ouyang, Changchun; dev@dpdk.org
> > Cc: Cao, Waterman
> > Subject: Re: [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't
> > handle chained vring descriptors
> >
> > On 5/28/2015 11:17 PM, Ouyang, Changchun wrote:
> > > Vring enqueue need consider the 2 cases:
> > >  1. Vring descriptors chained together, the first one is for virtio
> > > header, the
> > rest are for real
> > >     data, virtio driver in Linux usually use this scheme;  2. Only
> > > one descriptor, virtio header and real data share one single
> > > descriptor, virtio-
> > net pmd use
> > >     such scheme;
> > >
> > > So does vring dequeue, it should not assume vring descriptor is
> > > chained or not chained, virtio in different Linux version has
> > > different behavior, e.g. fedora 20 use chained vring descriptor,
> > > while
> > fedora 21 use one single vring descriptor for tx.
> > >
> > > Changes in v2
> > >   - drop the uncompleted packet
> > >   - refine code logic
> > >
> > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > ---
> > >  lib/librte_vhost/vhost_rxtx.c | 65
> > > +++++++++++++++++++++++++++++++++----------
> > >  1 file changed, 50 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/lib/librte_vhost/vhost_rxtx.c
> > > b/lib/librte_vhost/vhost_rxtx.c index 4809d32..06ae2df 100644
> > > --- a/lib/librte_vhost/vhost_rxtx.c
> > > +++ b/lib/librte_vhost/vhost_rxtx.c
> > > @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,
> > >  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> > >  	uint64_t buff_addr = 0;
> > >  	uint64_t buff_hdr_addr = 0;
> > > -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> > > +	uint32_t head[MAX_PKT_BURST];
> > >  	uint32_t head_idx, packet_success = 0;
> > >  	uint16_t avail_idx, res_cur_idx;
> > >  	uint16_t res_base_idx, res_end_idx; @@ -113,6 +113,10 @@
> > > virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,
> > >  	rte_prefetch0(&vq->desc[head[packet_success]]);
> > >
> > >  	while (res_cur_idx != res_end_idx) {
> > > +		uint32_t offset = 0;
> > > +		uint32_t data_len, len_to_cpy;
> > > +		uint8_t hdr = 0, uncompleted_pkt = 0;
> > > +
> > >  		/* Get descriptor from available ring */
> > >  		desc = &vq->desc[head[packet_success]];
> > >
> > > @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > > queue_id,
> > >
> > >  		/* Copy virtio_hdr to packet and increment buffer address */
> > >  		buff_hdr_addr = buff_addr;
> > > -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> > >
> > >  		/*
> > >  		 * If the descriptors are chained the header and data are @@
> > > -136,28 +139,55 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,
> > >  			desc = &vq->desc[desc->next];
> > >  			/* Buffer address translation. */
> > >  			buff_addr = gpa_to_vva(dev, desc->addr);
> > > -			desc->len = rte_pktmbuf_data_len(buff);
> > Do we got confirm from virtio SPEC that it is OK to only update used->len?
> 
> Virtio Spec don't require vhost update desc->len.
> 
> 
> > >  		} else {
> > >  			buff_addr += vq->vhost_hlen;
> > > -			desc->len = packet_len;
> > > +			hdr = 1;
> > >  		}
> > >
> > > +		data_len = rte_pktmbuf_data_len(buff);
> > > +		len_to_cpy = RTE_MIN(data_len,
> > > +			hdr ? desc->len - vq->vhost_hlen : desc->len);
> > > +		while (len_to_cpy > 0) {
> > > +			/* Copy mbuf data to buffer */
> > > +			rte_memcpy((void *)(uintptr_t)buff_addr,
> > > +				(const void *)(rte_pktmbuf_mtod(buff,
> > const char *) + offset),
> > > +				len_to_cpy);
> > > +			PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > > +				len_to_cpy, 0);
> > > +
> > > +			offset += len_to_cpy;
> > > +
> > > +			if (offset == data_len)
> > > +				break;
> > Ok, i see scatter gather case handling is in patch 5.
> > > +
> > > +			if (desc->flags & VRING_DESC_F_NEXT) {
> > > +				desc = &vq->desc[desc->next];
> > > +				buff_addr = gpa_to_vva(dev, desc->addr);
> > > +				len_to_cpy = RTE_MIN(data_len - offset,
> > desc->len);
> > > +			} else {
> > > +				/* Room in vring buffer is not enough */
> > > +				uncompleted_pkt = 1;
> > > +				break;
> > > +			}
> > > +		};
> > > +
> > >  		/* Update used ring with desc information */
> > >  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
> > >
> > 	head[packet_success];
> > > -		vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> > packet_len;
> > >
> > > -		/* Copy mbuf data to buffer */
> > > -		/* FIXME for sg mbuf and the case that desc couldn't hold the
> > mbuf data */
> > > -		rte_memcpy((void *)(uintptr_t)buff_addr,
> > > -			rte_pktmbuf_mtod(buff, const void *),
> > > -			rte_pktmbuf_data_len(buff));
> > > -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > > -			rte_pktmbuf_data_len(buff), 0);
> > > +		/* Drop the packet if it is uncompleted */
> > > +		if (unlikely(uncompleted_pkt == 1))
> > > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;
> > Here things become complicated with the previous lockless reserve.
> 
> Why it become complicated? Len = 0 means it contain any meaningful data in
> the buffer.
Sorry typo here, Len = 0 means it doesn't' contain any meaningful data in
 the buffer.

> 
> > What is the consequence when guest sees zero in used->len? At least,
> > do we check with virtio-net implementation?
> 
> >
> > > +		else
> > > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> > > +							offset + vq-
> > >vhost_hlen;
> > Two questions here,
> > 1.  add virtio header len?
> > 2.  Why not use packet_len rather than offset?
> > >
> > >  		res_cur_idx++;
> > >  		packet_success++;
> > >
> > > +		if (unlikely(uncompleted_pkt == 1))
> > > +			continue;
> > > +
> > >  		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
> > >  			(const void *)&virtio_hdr, vq->vhost_hlen);
> > >
> > > @@ -589,7 +619,14 @@ rte_vhost_dequeue_burst(struct virtio_net
> *dev,
> > uint16_t queue_id,
> > >  		desc = &vq->desc[head[entry_success]];
> > >
> > >  		/* Discard first buffer as it is the virtio header */
> > > -		desc = &vq->desc[desc->next];
> > > +		if (desc->flags & VRING_DESC_F_NEXT) {
> > > +			desc = &vq->desc[desc->next];
> > > +			vb_offset = 0;
> > > +			vb_avail = desc->len;
> > > +		} else {
> > > +			vb_offset = vq->vhost_hlen;
> > > +			vb_avail = desc->len - vb_offset;
> > > +		}
> > >
> > >  		/* Buffer address translation. */
> > >  		vb_addr = gpa_to_vva(dev, desc->addr); @@ -608,8 +645,6
> > @@
> > > rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> > >  		vq->used->ring[used_idx].id = head[entry_success];
> > >  		vq->used->ring[used_idx].len = 0;
> > >
> > > -		vb_offset = 0;
> > > -		vb_avail = desc->len;
> > >  		/* Allocate an mbuf and populate the structure. */
> > >  		m = rte_pktmbuf_alloc(mbuf_pool);
> > >  		if (unlikely(m == NULL)) {

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-05-31  8:40     ` Xie, Huawei
  2015-05-31 12:59       ` Ouyang, Changchun
@ 2015-05-31 13:33       ` Ouyang, Changchun
  1 sibling, 0 replies; 52+ messages in thread
From: Ouyang, Changchun @ 2015-05-31 13:33 UTC (permalink / raw)
  To: Xie, Huawei, dev



> -----Original Message-----
> From: Xie, Huawei
> Sent: Sunday, May 31, 2015 4:41 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Cc: Cao, Waterman
> Subject: Re: [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle
> chained vring descriptors
> 
> On 5/28/2015 11:17 PM, Ouyang, Changchun wrote:
> > Vring enqueue need consider the 2 cases:
> >  1. Vring descriptors chained together, the first one is for virtio header, the
> rest are for real
> >     data, virtio driver in Linux usually use this scheme;  2. Only one
> > descriptor, virtio header and real data share one single descriptor, virtio-
> net pmd use
> >     such scheme;
> >
> > So does vring dequeue, it should not assume vring descriptor is
> > chained or not chained, virtio in different Linux version has
> > different behavior, e.g. fedora 20 use chained vring descriptor, while
> fedora 21 use one single vring descriptor for tx.
> >
> > Changes in v2
> >   - drop the uncompleted packet
> >   - refine code logic
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >  lib/librte_vhost/vhost_rxtx.c | 65
> > +++++++++++++++++++++++++++++++++----------
> >  1 file changed, 50 insertions(+), 15 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost_rxtx.c
> > b/lib/librte_vhost/vhost_rxtx.c index 4809d32..06ae2df 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> >  	uint64_t buff_addr = 0;
> >  	uint64_t buff_hdr_addr = 0;
> > -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> > +	uint32_t head[MAX_PKT_BURST];
> >  	uint32_t head_idx, packet_success = 0;
> >  	uint16_t avail_idx, res_cur_idx;
> >  	uint16_t res_base_idx, res_end_idx;
> > @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	rte_prefetch0(&vq->desc[head[packet_success]]);
> >
> >  	while (res_cur_idx != res_end_idx) {
> > +		uint32_t offset = 0;
> > +		uint32_t data_len, len_to_cpy;
> > +		uint8_t hdr = 0, uncompleted_pkt = 0;
> > +
> >  		/* Get descriptor from available ring */
> >  		desc = &vq->desc[head[packet_success]];
> >
> > @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,
> >
> >  		/* Copy virtio_hdr to packet and increment buffer address */
> >  		buff_hdr_addr = buff_addr;
> > -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> >
> >  		/*
> >  		 * If the descriptors are chained the header and data are @@
> > -136,28 +139,55 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  			desc = &vq->desc[desc->next];
> >  			/* Buffer address translation. */
> >  			buff_addr = gpa_to_vva(dev, desc->addr);
> > -			desc->len = rte_pktmbuf_data_len(buff);
> Do we got confirm from virtio SPEC that it is OK to only update used->len?
> >  		} else {
> >  			buff_addr += vq->vhost_hlen;
> > -			desc->len = packet_len;
> > +			hdr = 1;
> >  		}
> >
> > +		data_len = rte_pktmbuf_data_len(buff);
> > +		len_to_cpy = RTE_MIN(data_len,
> > +			hdr ? desc->len - vq->vhost_hlen : desc->len);
> > +		while (len_to_cpy > 0) {
> > +			/* Copy mbuf data to buffer */
> > +			rte_memcpy((void *)(uintptr_t)buff_addr,
> > +				(const void *)(rte_pktmbuf_mtod(buff,
> const char *) + offset),
> > +				len_to_cpy);
> > +			PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > +				len_to_cpy, 0);
> > +
> > +			offset += len_to_cpy;
> > +
> > +			if (offset == data_len)
> > +				break;
> Ok, i see scatter gather case handling is in patch 5.
> > +
> > +			if (desc->flags & VRING_DESC_F_NEXT) {
> > +				desc = &vq->desc[desc->next];
> > +				buff_addr = gpa_to_vva(dev, desc->addr);
> > +				len_to_cpy = RTE_MIN(data_len - offset,
> desc->len);
> > +			} else {
> > +				/* Room in vring buffer is not enough */
> > +				uncompleted_pkt = 1;
> > +				break;
> > +			}
> > +		};
> > +
> >  		/* Update used ring with desc information */
> >  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
> >
> 	head[packet_success];
> > -		vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> packet_len;
> >
> > -		/* Copy mbuf data to buffer */
> > -		/* FIXME for sg mbuf and the case that desc couldn't hold the
> mbuf data */
> > -		rte_memcpy((void *)(uintptr_t)buff_addr,
> > -			rte_pktmbuf_mtod(buff, const void *),
> > -			rte_pktmbuf_data_len(buff));
> > -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > -			rte_pktmbuf_data_len(buff), 0);
> > +		/* Drop the packet if it is uncompleted */
> > +		if (unlikely(uncompleted_pkt == 1))
> > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;
> Here things become complicated with the previous lockless reserve.
> What is the consequence when guest sees zero in used->len? At least, do we
> check with virtio-net implementation?
> 
> > +		else
> > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> > +							offset + vq-
> >vhost_hlen;
> Two questions here,
> 1.  add virtio header len?
Will double confirm it is necessary need plus header len here.

> 2.  Why not use packet_len rather than offset?
For me, they are exact same value if the whole packet is totally copied,
Do you have any concern I use offset here?

> >
> >  		res_cur_idx++;
> >  		packet_success++;
> >
> > +		if (unlikely(uncompleted_pkt == 1))
> > +			continue;
> > +
> >  		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
> >  			(const void *)&virtio_hdr, vq->vhost_hlen);
> >
> > @@ -589,7 +619,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev,
> uint16_t queue_id,
> >  		desc = &vq->desc[head[entry_success]];
> >
> >  		/* Discard first buffer as it is the virtio header */
> > -		desc = &vq->desc[desc->next];
> > +		if (desc->flags & VRING_DESC_F_NEXT) {
> > +			desc = &vq->desc[desc->next];
> > +			vb_offset = 0;
> > +			vb_avail = desc->len;
> > +		} else {
> > +			vb_offset = vq->vhost_hlen;
> > +			vb_avail = desc->len - vb_offset;
> > +		}
> >
> >  		/* Buffer address translation. */
> >  		vb_addr = gpa_to_vva(dev, desc->addr); @@ -608,8 +645,6
> @@
> > rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> >  		vq->used->ring[used_idx].id = head[entry_success];
> >  		vq->used->ring[used_idx].len = 0;
> >
> > -		vb_offset = 0;
> > -		vb_avail = desc->len;
> >  		/* Allocate an mbuf and populate the structure. */
> >  		m = rte_pktmbuf_alloc(mbuf_pool);
> >  		if (unlikely(m == NULL)) {

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 0/4] Fix vhost enqueue/dequeue issue
  2015-05-28 15:16 ` [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue Ouyang Changchun
                     ` (4 preceding siblings ...)
  2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 5/5] lib_vhost: Add support copying scattered mbuf to vring Ouyang Changchun
@ 2015-06-01  8:25   ` Ouyang Changchun
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
                       ` (4 more replies)
  5 siblings, 5 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-01  8:25 UTC (permalink / raw)
  To: dev

Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 194 ++++++++++++++++++++++++++----------------
 1 file changed, 122 insertions(+), 72 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-06-01  8:25   ` [dpdk-dev] [PATCH v3 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
@ 2015-06-01  8:25     ` Ouyang Changchun
  2015-06-02  7:51       ` Xie, Huawei
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 2/4] lib_vhost: Refine code style Ouyang Changchun
                       ` (3 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-01  8:25 UTC (permalink / raw)
  To: dev

Vring enqueue need consider the 2 cases:
 1. use separate descriptors to contain virtio header and actual data, e.g. the first descriptor
    is for virtio header, and then followed by descriptors for actual data.
 2. virtio header and some data are put together in one descriptor, e.g. the first descriptor contain both
    virtio header and part of actual data, and then followed by more descriptors for rest of packet data,
    current DPDK based virtio-net pmd implementation is this case;

So does vring dequeue, it should not assume vring descriptor is chained or not chained, it should use
desc->flags to check whether it is chained or not. this patch also fixes TX corrupt issue in fedora 21
which uses one single vring descriptor(header and data are in one descriptor) for virtio tx process on default.

Changes in v3
  - support scattered mbuf, check the mbuf has 'next' pointer or not and copy all segments to vring buffer.

Changes in v2
  - drop the uncompleted packet
  - refine code logic

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 88 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 17 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 4809d32..5fe1b6c 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -46,7 +46,8 @@
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
  * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when mergeable is disabled.
+ * added to the RX queue. This function works when the mbuf is scattered, but
+ * it doesn't support the mergeable feature.
  */
 static inline uint32_t __attribute__((always_inline))
 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
@@ -59,7 +60,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint64_t buff_addr = 0;
 	uint64_t buff_hdr_addr = 0;
-	uint32_t head[MAX_PKT_BURST], packet_len = 0;
+	uint32_t head[MAX_PKT_BURST];
 	uint32_t head_idx, packet_success = 0;
 	uint16_t avail_idx, res_cur_idx;
 	uint16_t res_base_idx, res_end_idx;
@@ -113,6 +114,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	rte_prefetch0(&vq->desc[head[packet_success]]);
 
 	while (res_cur_idx != res_end_idx) {
+		uint32_t offset = 0, vb_offset = 0;
+		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
+		uint8_t hdr = 0, uncompleted_pkt = 0;
+
 		/* Get descriptor from available ring */
 		desc = &vq->desc[head[packet_success]];
 
@@ -125,7 +130,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 		/* Copy virtio_hdr to packet and increment buffer address */
 		buff_hdr_addr = buff_addr;
-		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
 
 		/*
 		 * If the descriptors are chained the header and data are
@@ -136,28 +140,73 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
-			desc->len = rte_pktmbuf_data_len(buff);
 		} else {
-			buff_addr += vq->vhost_hlen;
-			desc->len = packet_len;
+			vb_offset += vq->vhost_hlen;
+			hdr = 1;
 		}
 
+		pkt_len = rte_pktmbuf_pkt_len(buff);
+		data_len = rte_pktmbuf_data_len(buff);
+		len_to_cpy = RTE_MIN(data_len,
+			hdr ? desc->len - vq->vhost_hlen : desc->len);
+		while (len_to_cpy > 0) {
+			/* Copy mbuf data to buffer */
+			rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
+				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
+				len_to_cpy);
+			PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
+				len_to_cpy, 0);
+
+			offset += len_to_cpy;
+			vb_offset += len_to_cpy;
+			total_copied += len_to_cpy;
+
+			/* The whole packet completes */
+			if (total_copied == pkt_len)
+				break;
+
+			/* The current segment completes */
+			if (offset == data_len) {
+				buff = buff->next;
+				if (buff != NULL) {
+					offset = 0;
+					data_len = rte_pktmbuf_data_len(buff);
+				}
+			}
+
+			/* The current vring descriptor done */
+			if (vb_offset == desc->len) {
+				if (desc->flags & VRING_DESC_F_NEXT) {
+					desc = &vq->desc[desc->next];
+					buff_addr = gpa_to_vva(dev, desc->addr);
+					vb_offset = 0;
+				} else {
+					/* Room in vring buffer is not enough */
+					uncompleted_pkt = 1;
+					break;
+				}
+			}
+			len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
+		};
+
 		/* Update used ring with desc information */
 		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
 							head[packet_success];
-		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
 
-		/* Copy mbuf data to buffer */
-		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
-		rte_memcpy((void *)(uintptr_t)buff_addr,
-			rte_pktmbuf_mtod(buff, const void *),
-			rte_pktmbuf_data_len(buff));
-		PRINT_PACKET(dev, (uintptr_t)buff_addr,
-			rte_pktmbuf_data_len(buff), 0);
+		/* Drop the packet if it is uncompleted */
+		if (unlikely(uncompleted_pkt == 1))
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							vq->vhost_hlen;
+		else
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							pkt_len + vq->vhost_hlen;
 
 		res_cur_idx++;
 		packet_success++;
 
+		if (unlikely(uncompleted_pkt == 1))
+			continue;
+
 		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
 			(const void *)&virtio_hdr, vq->vhost_hlen);
 
@@ -589,7 +638,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		desc = &vq->desc[head[entry_success]];
 
 		/* Discard first buffer as it is the virtio header */
-		desc = &vq->desc[desc->next];
+		if (desc->flags & VRING_DESC_F_NEXT) {
+			desc = &vq->desc[desc->next];
+			vb_offset = 0;
+			vb_avail = desc->len;
+		} else {
+			vb_offset = vq->vhost_hlen;
+			vb_avail = desc->len - vb_offset;
+		}
 
 		/* Buffer address translation. */
 		vb_addr = gpa_to_vva(dev, desc->addr);
@@ -608,8 +664,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		vq->used->ring[used_idx].id = head[entry_success];
 		vq->used->ring[used_idx].len = 0;
 
-		vb_offset = 0;
-		vb_avail = desc->len;
 		/* Allocate an mbuf and populate the structure. */
 		m = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(m == NULL)) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 2/4] lib_vhost: Refine code style
  2015-06-01  8:25   ` [dpdk-dev] [PATCH v3 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
@ 2015-06-01  8:25     ` Ouyang Changchun
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 3/4] lib_vhost: Extract function Ouyang Changchun
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-01  8:25 UTC (permalink / raw)
  To: dev

Remove unnecessary new line.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 5fe1b6c..de60e9b 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -267,8 +267,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	 * (guest physical addr -> vhost virtual addr)
 	 */
 	vq = dev->virtqueue[VIRTIO_RXQ];
-	vb_addr =
-		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 	vb_hdr_addr = vb_addr;
 
 	/* Prefetch buffer address. */
@@ -286,8 +285,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 
 	seg_avail = rte_pktmbuf_data_len(pkt);
 	vb_offset = vq->vhost_hlen;
-	vb_avail =
-		vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
+	vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
 
 	entry_len = vq->vhost_hlen;
 
@@ -310,8 +308,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 		}
 
 		vec_idx++;
-		vb_addr =
-			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+		vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)vb_addr);
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 3/4] lib_vhost: Extract function
  2015-06-01  8:25   ` [dpdk-dev] [PATCH v3 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 2/4] lib_vhost: Refine code style Ouyang Changchun
@ 2015-06-01  8:25     ` Ouyang Changchun
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
  2015-06-02  8:51     ` [dpdk-dev] [PATCH v4 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-01  8:25 UTC (permalink / raw)
  To: dev

Extract codes into 2 common functions:
update_secure_len which is used to accumulate the buffer len in the vring descriptors.
and fill_buf_vec which is used to fill struct buf_vec.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 79 +++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 32 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index de60e9b..647a0c1 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -438,6 +438,49 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	return entry_success;
 }
 
+static inline void __attribute__((always_inline))
+update_secure_len(struct vhost_virtqueue *vq, uint32_t idx,
+	uint32_t *secure_len)
+{
+	uint8_t next_desc = 0;
+	uint32_t len = *secure_len;
+
+	do {
+		next_desc = 0;
+		len += vq->desc[idx].len;
+		if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
+			idx = vq->desc[idx].next;
+			next_desc = 1;
+		}
+	} while (next_desc);
+
+	*secure_len = len;
+}
+
+static inline void __attribute__((always_inline))
+fill_buf_vec(struct vhost_virtqueue *vq, uint16_t id, uint32_t *vec_idx)
+{
+	uint16_t wrapped_idx = id & (vq->size - 1);
+	uint32_t idx = vq->avail->ring[wrapped_idx];
+	uint8_t next_desc;
+	uint32_t vec_id = *vec_idx;
+
+	do {
+		next_desc = 0;
+		vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
+		vq->buf_vec[vec_id].buf_len = vq->desc[idx].len;
+		vq->buf_vec[vec_id].desc_idx = idx;
+		vec_id++;
+
+		if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
+			idx = vq->desc[idx].next;
+			next_desc = 1;
+		}
+	} while (next_desc);
+
+	*vec_idx = vec_id;
+}
+
 /*
  * This function works for mergeable RX.
  */
@@ -468,7 +511,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		uint16_t need_cnt;
 		uint32_t vec_idx = 0;
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
-		uint16_t i, id;
+		uint16_t i;
 
 		do {
 			/*
@@ -492,18 +535,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 						(res_cur_idx) & (vq->size - 1);
 					uint32_t idx =
 						vq->avail->ring[wrapped_idx];
-					uint8_t next_desc;
-
-					do {
-						next_desc = 0;
-						secure_len += vq->desc[idx].len;
-						if (vq->desc[idx].flags &
-							VRING_DESC_F_NEXT) {
-							idx = vq->desc[idx].next;
-							next_desc = 1;
-						}
-					} while (next_desc);
 
+					update_secure_len(vq, idx, &secure_len);
 					res_cur_idx++;
 				}
 			} while (pkt_len > secure_len);
@@ -514,28 +547,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 							res_cur_idx);
 		} while (success == 0);
 
-		id = res_base_idx;
 		need_cnt = res_cur_idx - res_base_idx;
 
-		for (i = 0; i < need_cnt; i++, id++) {
-			uint16_t wrapped_idx = id & (vq->size - 1);
-			uint32_t idx = vq->avail->ring[wrapped_idx];
-			uint8_t next_desc;
-			do {
-				next_desc = 0;
-				vq->buf_vec[vec_idx].buf_addr =
-					vq->desc[idx].addr;
-				vq->buf_vec[vec_idx].buf_len =
-					vq->desc[idx].len;
-				vq->buf_vec[vec_idx].desc_idx = idx;
-				vec_idx++;
-
-				if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
-					idx = vq->desc[idx].next;
-					next_desc = 1;
-				}
-			} while (next_desc);
-		}
+		for (i = 0; i < need_cnt; i++)
+			fill_buf_vec(vq, res_base_idx + i, &vec_idx);
 
 		res_end_idx = res_cur_idx;
 
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 4/4] lib_vhost: Remove unnecessary vring descriptor length updating
  2015-06-01  8:25   ` [dpdk-dev] [PATCH v3 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                       ` (2 preceding siblings ...)
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 3/4] lib_vhost: Extract function Ouyang Changchun
@ 2015-06-01  8:25     ` Ouyang Changchun
  2015-06-02  8:51     ` [dpdk-dev] [PATCH v4 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-01  8:25 UTC (permalink / raw)
  To: dev

Remove these unnecessary vring descriptor length updating, vhost should not change them.
virtio in front end should assign value to desc.len for both rx and tx.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 647a0c1..9a81095 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -136,7 +136,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		 * placed in separate buffers.
 		 */
 		if (desc->flags & VRING_DESC_F_NEXT) {
-			desc->len = vq->vhost_hlen;
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
@@ -292,7 +291,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	if (vb_avail == 0) {
 		uint32_t desc_idx =
 			vq->buf_vec[vec_idx].desc_idx;
-		vq->desc[desc_idx].len = vq->vhost_hlen;
 
 		if ((vq->desc[desc_idx].flags
 			& VRING_DESC_F_NEXT) == 0) {
@@ -376,7 +374,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 					 */
 					uint32_t desc_idx =
 						vq->buf_vec[vec_idx].desc_idx;
-					vq->desc[desc_idx].len = vb_offset;
 
 					if ((vq->desc[desc_idx].flags &
 						VRING_DESC_F_NEXT) == 0) {
@@ -411,26 +408,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 				/*
 				 * This whole packet completes.
 				 */
-				uint32_t desc_idx =
-					vq->buf_vec[vec_idx].desc_idx;
-				vq->desc[desc_idx].len = vb_offset;
-
-				while (vq->desc[desc_idx].flags &
-					VRING_DESC_F_NEXT) {
-					desc_idx = vq->desc[desc_idx].next;
-					 vq->desc[desc_idx].len = 0;
-				}
-
 				/* Update used ring with desc information */
 				vq->used->ring[cur_idx & (vq->size - 1)].id
 					= vq->buf_vec[vec_idx].desc_idx;
 				vq->used->ring[cur_idx & (vq->size - 1)].len
 					= entry_len;
-				entry_len = 0;
-				cur_idx++;
 				entry_success++;
-				seg_avail = 0;
-				cpy_len = RTE_MIN(vb_avail, seg_avail);
+				break;
 			}
 		}
 	}
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
@ 2015-06-02  7:51       ` Xie, Huawei
  2015-06-02  8:10         ` Ouyang, Changchun
  0 siblings, 1 reply; 52+ messages in thread
From: Xie, Huawei @ 2015-06-02  7:51 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

On 6/1/2015 4:26 PM, Ouyang, Changchun wrote:
> Vring enqueue need consider the 2 cases:
>  1. use separate descriptors to contain virtio header and actual data, e.g. the first descriptor
>     is for virtio header, and then followed by descriptors for actual data.
>  2. virtio header and some data are put together in one descriptor, e.g. the first descriptor contain both
>     virtio header and part of actual data, and then followed by more descriptors for rest of packet data,
>     current DPDK based virtio-net pmd implementation is this case;
>
> So does vring dequeue, it should not assume vring descriptor is chained or not chained, it should use
> desc->flags to check whether it is chained or not. this patch also fixes TX corrupt issue in fedora 21
> which uses one single vring descriptor(header and data are in one descriptor) for virtio tx process on default.
Suggest remove fedora 21 in the commit message, at least the bug is
related to virtio-net driver rather than distribution.
> Changes in v3
>   - support scattered mbuf, check the mbuf has 'next' pointer or not and copy all segments to vring buffer.
>
> Changes in v2
>   - drop the uncompleted packet
>   - refine code logic
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_vhost/vhost_rxtx.c | 88 ++++++++++++++++++++++++++++++++++---------
>  1 file changed, 71 insertions(+), 17 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 4809d32..5fe1b6c 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -46,7 +46,8 @@
>   * This function adds buffers to the virtio devices RX virtqueue. Buffers can
>   * be received from the physical port or from another virtio device. A packet
>   * count is returned to indicate the number of packets that are succesfully
> - * added to the RX queue. This function works when mergeable is disabled.
> + * added to the RX queue. This function works when the mbuf is scattered, but
> + * it doesn't support the mergeable feature.
>   */
>  static inline uint32_t __attribute__((always_inline))
>  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
> @@ -59,7 +60,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>  	uint64_t buff_addr = 0;
>  	uint64_t buff_hdr_addr = 0;
> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> +	uint32_t head[MAX_PKT_BURST];
>  	uint32_t head_idx, packet_success = 0;
>  	uint16_t avail_idx, res_cur_idx;
>  	uint16_t res_base_idx, res_end_idx;
> @@ -113,6 +114,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>  
>  	while (res_cur_idx != res_end_idx) {
> +		uint32_t offset = 0, vb_offset = 0;
> +		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
> +		uint8_t hdr = 0, uncompleted_pkt = 0;
> +
>  		/* Get descriptor from available ring */
>  		desc = &vq->desc[head[packet_success]];
>  
> @@ -125,7 +130,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  
>  		/* Copy virtio_hdr to packet and increment buffer address */
>  		buff_hdr_addr = buff_addr;
> -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>  
>  		/*
>  		 * If the descriptors are chained the header and data are
> @@ -136,28 +140,73 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  			desc = &vq->desc[desc->next];
>  			/* Buffer address translation. */
>  			buff_addr = gpa_to_vva(dev, desc->addr);
> -			desc->len = rte_pktmbuf_data_len(buff);
>  		} else {
> -			buff_addr += vq->vhost_hlen;
> -			desc->len = packet_len;
> +			vb_offset += vq->vhost_hlen;
> +			hdr = 1;
>  		}
>  
> +		pkt_len = rte_pktmbuf_pkt_len(buff);
> +		data_len = rte_pktmbuf_data_len(buff);
> +		len_to_cpy = RTE_MIN(data_len,
> +			hdr ? desc->len - vq->vhost_hlen : desc->len);
> +		while (len_to_cpy > 0) {
while (total_copied < pkt_len) is secure and readable.
Besides,  what if we encounter some descriptor with zero length? With
len_to_cpy > 0, we would pass a partially copied mbuf to guest but still
used->len = packet_len.
> +			/* Copy mbuf data to buffer */
> +			rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
> +				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
> +				len_to_cpy);
> +			PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
> +				len_to_cpy, 0);
> +
> +			offset += len_to_cpy;
> +			vb_offset += len_to_cpy;
> +			total_copied += len_to_cpy;
> +
> +			/* The whole packet completes */
> +			if (total_copied == pkt_len)
> +				break;
> +
> +			/* The current segment completes */
> +			if (offset == data_len) {
> +				buff = buff->next;
> +				if (buff != NULL) {
> +					offset = 0;
> +					data_len = rte_pktmbuf_data_len(buff);
> +				}
What if (buf == NULL)? Either we treat mbuf reliable, and don't do any
sanity check, or we check thoroughly.
if (buff != NULL) {

} else {
    ...
    break;
}
> +			}
> +
> +			/* The current vring descriptor done */
> +			if (vb_offset == desc->len) {
> +				if (desc->flags & VRING_DESC_F_NEXT) {
> +					desc = &vq->desc[desc->next];
> +					buff_addr = gpa_to_vva(dev, desc->addr);
> +					vb_offset = 0;
> +				} else {
> +					/* Room in vring buffer is not enough */
> +					uncompleted_pkt = 1;
> +					break;
> +				}
> +			}
> +			len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
> +		};
> +
>  		/* Update used ring with desc information */
>  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
>  							head[packet_success];
> -		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
>  
> -		/* Copy mbuf data to buffer */
> -		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
> -		rte_memcpy((void *)(uintptr_t)buff_addr,
> -			rte_pktmbuf_mtod(buff, const void *),
> -			rte_pktmbuf_data_len(buff));
> -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> -			rte_pktmbuf_data_len(buff), 0);
> +		/* Drop the packet if it is uncompleted */
> +		if (unlikely(uncompleted_pkt == 1))
> +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> +							vq->vhost_hlen;
> +		else
> +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> +							pkt_len + vq->vhost_hlen;
>  
>  		res_cur_idx++;
>  		packet_success++;
>  
> +		if (unlikely(uncompleted_pkt == 1))
> +			continue;
> +
>  		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
>  			(const void *)&virtio_hdr, vq->vhost_hlen);
>  
> @@ -589,7 +638,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		desc = &vq->desc[head[entry_success]];
>  
>  		/* Discard first buffer as it is the virtio header */
> -		desc = &vq->desc[desc->next];
> +		if (desc->flags & VRING_DESC_F_NEXT) {
> +			desc = &vq->desc[desc->next];
> +			vb_offset = 0;
> +			vb_avail = desc->len;
> +		} else {
> +			vb_offset = vq->vhost_hlen;
> +			vb_avail = desc->len - vb_offset;
> +		}
>  
>  		/* Buffer address translation. */
>  		vb_addr = gpa_to_vva(dev, desc->addr);
> @@ -608,8 +664,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		vq->used->ring[used_idx].id = head[entry_success];
>  		vq->used->ring[used_idx].len = 0;
>  
> -		vb_offset = 0;
> -		vb_avail = desc->len;
>  		/* Allocate an mbuf and populate the structure. */
>  		m = rte_pktmbuf_alloc(mbuf_pool);
>  		if (unlikely(m == NULL)) {


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-06-02  7:51       ` Xie, Huawei
@ 2015-06-02  8:10         ` Ouyang, Changchun
  0 siblings, 0 replies; 52+ messages in thread
From: Ouyang, Changchun @ 2015-06-02  8:10 UTC (permalink / raw)
  To: Xie, Huawei, dev



> -----Original Message-----
> From: Xie, Huawei
> Sent: Tuesday, June 2, 2015 3:51 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Cc: Cao, Waterman
> Subject: Re: [PATCH v3 1/4] lib_vhost: Fix enqueue/dequeue can't handle
> chained vring descriptors
> 
> On 6/1/2015 4:26 PM, Ouyang, Changchun wrote:
> > Vring enqueue need consider the 2 cases:
> >  1. use separate descriptors to contain virtio header and actual data, e.g.
> the first descriptor
> >     is for virtio header, and then followed by descriptors for actual data.
> >  2. virtio header and some data are put together in one descriptor, e.g. the
> first descriptor contain both
> >     virtio header and part of actual data, and then followed by more
> descriptors for rest of packet data,
> >     current DPDK based virtio-net pmd implementation is this case;
> >
> > So does vring dequeue, it should not assume vring descriptor is
> > chained or not chained, it should use
> > desc->flags to check whether it is chained or not. this patch also
> > desc->fixes TX corrupt issue in fedora 21
> > which uses one single vring descriptor(header and data are in one
> descriptor) for virtio tx process on default.
> Suggest remove fedora 21 in the commit message, at least the bug is related
> to virtio-net driver rather than distribution.

Just try to give more information that this patch can fix the issue TX corrupt issue on fedora 21,
I don't think this information is contradict with what you mean. 

> > Changes in v3
> >   - support scattered mbuf, check the mbuf has 'next' pointer or not and
> copy all segments to vring buffer.
> >
> > Changes in v2
> >   - drop the uncompleted packet
> >   - refine code logic
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >  lib/librte_vhost/vhost_rxtx.c | 88
> > ++++++++++++++++++++++++++++++++++---------
> >  1 file changed, 71 insertions(+), 17 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost_rxtx.c
> > b/lib/librte_vhost/vhost_rxtx.c index 4809d32..5fe1b6c 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -46,7 +46,8 @@
> >   * This function adds buffers to the virtio devices RX virtqueue. Buffers can
> >   * be received from the physical port or from another virtio device. A
> packet
> >   * count is returned to indicate the number of packets that are
> > succesfully
> > - * added to the RX queue. This function works when mergeable is disabled.
> > + * added to the RX queue. This function works when the mbuf is
> > + scattered, but
> > + * it doesn't support the mergeable feature.
> >   */
> >  static inline uint32_t __attribute__((always_inline))
> > virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, @@ -59,7
> > +60,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
> >  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> >  	uint64_t buff_addr = 0;
> >  	uint64_t buff_hdr_addr = 0;
> > -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> > +	uint32_t head[MAX_PKT_BURST];
> >  	uint32_t head_idx, packet_success = 0;
> >  	uint16_t avail_idx, res_cur_idx;
> >  	uint16_t res_base_idx, res_end_idx;
> > @@ -113,6 +114,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	rte_prefetch0(&vq->desc[head[packet_success]]);
> >
> >  	while (res_cur_idx != res_end_idx) {
> > +		uint32_t offset = 0, vb_offset = 0;
> > +		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
> > +		uint8_t hdr = 0, uncompleted_pkt = 0;
> > +
> >  		/* Get descriptor from available ring */
> >  		desc = &vq->desc[head[packet_success]];
> >
> > @@ -125,7 +130,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,
> >
> >  		/* Copy virtio_hdr to packet and increment buffer address */
> >  		buff_hdr_addr = buff_addr;
> > -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> >
> >  		/*
> >  		 * If the descriptors are chained the header and data are @@
> > -136,28 +140,73 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  			desc = &vq->desc[desc->next];
> >  			/* Buffer address translation. */
> >  			buff_addr = gpa_to_vva(dev, desc->addr);
> > -			desc->len = rte_pktmbuf_data_len(buff);
> >  		} else {
> > -			buff_addr += vq->vhost_hlen;
> > -			desc->len = packet_len;
> > +			vb_offset += vq->vhost_hlen;
> > +			hdr = 1;
> >  		}
> >
> > +		pkt_len = rte_pktmbuf_pkt_len(buff);
> > +		data_len = rte_pktmbuf_data_len(buff);
> > +		len_to_cpy = RTE_MIN(data_len,
> > +			hdr ? desc->len - vq->vhost_hlen : desc->len);
> > +		while (len_to_cpy > 0) {
> while (total_copied < pkt_len) is secure and readable.
> Besides,  what if we encounter some descriptor with zero length? With
> len_to_cpy > 0, we would pass a partially copied mbuf to guest but still
> used->len = packet_len.

Good catch, will update it in v4.

> > +			/* Copy mbuf data to buffer */
> > +			rte_memcpy((void *)(uintptr_t)(buff_addr +
> vb_offset),
> > +				(const void *)(rte_pktmbuf_mtod(buff,
> const char *) + offset),
> > +				len_to_cpy);
> > +			PRINT_PACKET(dev, (uintptr_t)(buff_addr +
> vb_offset),
> > +				len_to_cpy, 0);
> > +
> > +			offset += len_to_cpy;
> > +			vb_offset += len_to_cpy;
> > +			total_copied += len_to_cpy;
> > +
> > +			/* The whole packet completes */
> > +			if (total_copied == pkt_len)
> > +				break;
> > +
> > +			/* The current segment completes */
> > +			if (offset == data_len) {
> > +				buff = buff->next;
> > +				if (buff != NULL) {
> > +					offset = 0;
> > +					data_len =
> rte_pktmbuf_data_len(buff);
> > +				}
> What if (buf == NULL)? Either we treat mbuf reliable, and don't do any sanity
> check, or we check thoroughly.
> if (buff != NULL) {

The check could be removed, as another check " total_copied == pkt_len" can guarantee when the current segment completes and
The whole packet doesn't complete, then the next must not be null. 
Will update it in v4

> 
> } else {
>     ...
>     break;
> }
> > +			}
> > +
> > +			/* The current vring descriptor done */
> > +			if (vb_offset == desc->len) {
> > +				if (desc->flags & VRING_DESC_F_NEXT) {
> > +					desc = &vq->desc[desc->next];
> > +					buff_addr = gpa_to_vva(dev, desc-
> >addr);
> > +					vb_offset = 0;
> > +				} else {
> > +					/* Room in vring buffer is not enough
> */
> > +					uncompleted_pkt = 1;
> > +					break;
> > +				}
> > +			}
> > +			len_to_cpy = RTE_MIN(data_len - offset, desc->len -
> vb_offset);
> > +		};
> > +
> >  		/* Update used ring with desc information */
> >  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
> >
> 	head[packet_success];
> > -		vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> packet_len;
> >
> > -		/* Copy mbuf data to buffer */
> > -		/* FIXME for sg mbuf and the case that desc couldn't hold the
> mbuf data */
> > -		rte_memcpy((void *)(uintptr_t)buff_addr,
> > -			rte_pktmbuf_mtod(buff, const void *),
> > -			rte_pktmbuf_data_len(buff));
> > -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > -			rte_pktmbuf_data_len(buff), 0);
> > +		/* Drop the packet if it is uncompleted */
> > +		if (unlikely(uncompleted_pkt == 1))
> > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> > +							vq->vhost_hlen;
> > +		else
> > +			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> > +							pkt_len + vq-
> >vhost_hlen;
> >
> >  		res_cur_idx++;
> >  		packet_success++;
> >
> > +		if (unlikely(uncompleted_pkt == 1))
> > +			continue;
> > +
> >  		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
> >  			(const void *)&virtio_hdr, vq->vhost_hlen);
> >
> > @@ -589,7 +638,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev,
> uint16_t queue_id,
> >  		desc = &vq->desc[head[entry_success]];
> >
> >  		/* Discard first buffer as it is the virtio header */
> > -		desc = &vq->desc[desc->next];
> > +		if (desc->flags & VRING_DESC_F_NEXT) {
> > +			desc = &vq->desc[desc->next];
> > +			vb_offset = 0;
> > +			vb_avail = desc->len;
> > +		} else {
> > +			vb_offset = vq->vhost_hlen;
> > +			vb_avail = desc->len - vb_offset;
> > +		}
> >
> >  		/* Buffer address translation. */
> >  		vb_addr = gpa_to_vva(dev, desc->addr); @@ -608,8 +664,6
> @@
> > rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> >  		vq->used->ring[used_idx].id = head[entry_success];
> >  		vq->used->ring[used_idx].len = 0;
> >
> > -		vb_offset = 0;
> > -		vb_avail = desc->len;
> >  		/* Allocate an mbuf and populate the structure. */
> >  		m = rte_pktmbuf_alloc(mbuf_pool);
> >  		if (unlikely(m == NULL)) {

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 0/4] Fix vhost enqueue/dequeue issue
  2015-06-01  8:25   ` [dpdk-dev] [PATCH v3 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                       ` (3 preceding siblings ...)
  2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
@ 2015-06-02  8:51     ` Ouyang Changchun
  2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
                         ` (4 more replies)
  4 siblings, 5 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-02  8:51 UTC (permalink / raw)
  To: dev

Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 192 ++++++++++++++++++++++++++----------------
 1 file changed, 120 insertions(+), 72 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-06-02  8:51     ` [dpdk-dev] [PATCH v4 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
@ 2015-06-02  8:51       ` Ouyang Changchun
  2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 2/4] lib_vhost: Refine code style Ouyang Changchun
                         ` (3 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-02  8:51 UTC (permalink / raw)
  To: dev

Vring enqueue need consider the 2 cases:
 1. use separate descriptors to contain virtio header and actual data, e.g. the first descriptor
    is for virtio header, and then followed by descriptors for actual data.
 2. virtio header and some data are put together in one descriptor, e.g. the first descriptor contain both
    virtio header and part of actual data, and then followed by more descriptors for rest of packet data,
    current DPDK based virtio-net pmd implementation is this case;

So does vring dequeue, it should not assume vring descriptor is chained or not chained, it should use
desc->flags to check whether it is chained or not. this patch also fixes TX corrupt issue when vhost
co-work with virtio-net driver which uses one single vring descriptor(header and data are in one descriptor)
for virtio tx process on default.

Changes in v4
  - remove unnecessary check for mbuf 'next' pointer
  - refine packet copying completeness check

Changes in v3
  - support scattered mbuf, check the mbuf has 'next' pointer or not and copy all segments to vring buffer.

Changes in v2
  - drop the uncompleted packet
  - refine code logic

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 86 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 69 insertions(+), 17 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 4809d32..dc555a8 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -46,7 +46,8 @@
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
  * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when mergeable is disabled.
+ * added to the RX queue. This function works when the mbuf is scattered, but
+ * it doesn't support the mergeable feature.
  */
 static inline uint32_t __attribute__((always_inline))
 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
@@ -59,7 +60,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint64_t buff_addr = 0;
 	uint64_t buff_hdr_addr = 0;
-	uint32_t head[MAX_PKT_BURST], packet_len = 0;
+	uint32_t head[MAX_PKT_BURST];
 	uint32_t head_idx, packet_success = 0;
 	uint16_t avail_idx, res_cur_idx;
 	uint16_t res_base_idx, res_end_idx;
@@ -113,6 +114,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	rte_prefetch0(&vq->desc[head[packet_success]]);
 
 	while (res_cur_idx != res_end_idx) {
+		uint32_t offset = 0, vb_offset = 0;
+		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
+		uint8_t hdr = 0, uncompleted_pkt = 0;
+
 		/* Get descriptor from available ring */
 		desc = &vq->desc[head[packet_success]];
 
@@ -125,7 +130,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 		/* Copy virtio_hdr to packet and increment buffer address */
 		buff_hdr_addr = buff_addr;
-		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
 
 		/*
 		 * If the descriptors are chained the header and data are
@@ -136,28 +140,71 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
-			desc->len = rte_pktmbuf_data_len(buff);
 		} else {
-			buff_addr += vq->vhost_hlen;
-			desc->len = packet_len;
+			vb_offset += vq->vhost_hlen;
+			hdr = 1;
 		}
 
+		pkt_len = rte_pktmbuf_pkt_len(buff);
+		data_len = rte_pktmbuf_data_len(buff);
+		len_to_cpy = RTE_MIN(data_len,
+			hdr ? desc->len - vq->vhost_hlen : desc->len);
+		while (total_copied < pkt_len) {
+			/* Copy mbuf data to buffer */
+			rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
+				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
+				len_to_cpy);
+			PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
+				len_to_cpy, 0);
+
+			offset += len_to_cpy;
+			vb_offset += len_to_cpy;
+			total_copied += len_to_cpy;
+
+			/* The whole packet completes */
+			if (total_copied == pkt_len)
+				break;
+
+			/* The current segment completes */
+			if (offset == data_len) {
+				buff = buff->next;
+				offset = 0;
+				data_len = rte_pktmbuf_data_len(buff);
+			}
+
+			/* The current vring descriptor done */
+			if (vb_offset == desc->len) {
+				if (desc->flags & VRING_DESC_F_NEXT) {
+					desc = &vq->desc[desc->next];
+					buff_addr = gpa_to_vva(dev, desc->addr);
+					vb_offset = 0;
+				} else {
+					/* Room in vring buffer is not enough */
+					uncompleted_pkt = 1;
+					break;
+				}
+			}
+			len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
+		};
+
 		/* Update used ring with desc information */
 		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
 							head[packet_success];
-		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
 
-		/* Copy mbuf data to buffer */
-		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
-		rte_memcpy((void *)(uintptr_t)buff_addr,
-			rte_pktmbuf_mtod(buff, const void *),
-			rte_pktmbuf_data_len(buff));
-		PRINT_PACKET(dev, (uintptr_t)buff_addr,
-			rte_pktmbuf_data_len(buff), 0);
+		/* Drop the packet if it is uncompleted */
+		if (unlikely(uncompleted_pkt == 1))
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							vq->vhost_hlen;
+		else
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							pkt_len + vq->vhost_hlen;
 
 		res_cur_idx++;
 		packet_success++;
 
+		if (unlikely(uncompleted_pkt == 1))
+			continue;
+
 		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
 			(const void *)&virtio_hdr, vq->vhost_hlen);
 
@@ -589,7 +636,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		desc = &vq->desc[head[entry_success]];
 
 		/* Discard first buffer as it is the virtio header */
-		desc = &vq->desc[desc->next];
+		if (desc->flags & VRING_DESC_F_NEXT) {
+			desc = &vq->desc[desc->next];
+			vb_offset = 0;
+			vb_avail = desc->len;
+		} else {
+			vb_offset = vq->vhost_hlen;
+			vb_avail = desc->len - vb_offset;
+		}
 
 		/* Buffer address translation. */
 		vb_addr = gpa_to_vva(dev, desc->addr);
@@ -608,8 +662,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		vq->used->ring[used_idx].id = head[entry_success];
 		vq->used->ring[used_idx].len = 0;
 
-		vb_offset = 0;
-		vb_avail = desc->len;
 		/* Allocate an mbuf and populate the structure. */
 		m = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(m == NULL)) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 2/4] lib_vhost: Refine code style
  2015-06-02  8:51     ` [dpdk-dev] [PATCH v4 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
@ 2015-06-02  8:51       ` Ouyang Changchun
  2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 3/4] lib_vhost: Extract function Ouyang Changchun
                         ` (2 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-02  8:51 UTC (permalink / raw)
  To: dev

Remove unnecessary new line.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index dc555a8..0286a92 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -265,8 +265,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	 * (guest physical addr -> vhost virtual addr)
 	 */
 	vq = dev->virtqueue[VIRTIO_RXQ];
-	vb_addr =
-		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 	vb_hdr_addr = vb_addr;
 
 	/* Prefetch buffer address. */
@@ -284,8 +283,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 
 	seg_avail = rte_pktmbuf_data_len(pkt);
 	vb_offset = vq->vhost_hlen;
-	vb_avail =
-		vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
+	vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
 
 	entry_len = vq->vhost_hlen;
 
@@ -308,8 +306,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 		}
 
 		vec_idx++;
-		vb_addr =
-			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+		vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)vb_addr);
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 3/4] lib_vhost: Extract function
  2015-06-02  8:51     ` [dpdk-dev] [PATCH v4 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
  2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 2/4] lib_vhost: Refine code style Ouyang Changchun
@ 2015-06-02  8:51       ` Ouyang Changchun
  2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
  2015-06-03  6:02       ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-02  8:51 UTC (permalink / raw)
  To: dev

Extract codes into 2 common functions:
update_secure_len which is used to accumulate the buffer len in the vring descriptors.
and fill_buf_vec which is used to fill struct buf_vec.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 79 +++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 32 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 0286a92..e08609c 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -436,6 +436,49 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	return entry_success;
 }
 
+static inline void __attribute__((always_inline))
+update_secure_len(struct vhost_virtqueue *vq, uint32_t idx,
+	uint32_t *secure_len)
+{
+	uint8_t next_desc = 0;
+	uint32_t len = *secure_len;
+
+	do {
+		next_desc = 0;
+		len += vq->desc[idx].len;
+		if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
+			idx = vq->desc[idx].next;
+			next_desc = 1;
+		}
+	} while (next_desc);
+
+	*secure_len = len;
+}
+
+static inline void __attribute__((always_inline))
+fill_buf_vec(struct vhost_virtqueue *vq, uint16_t id, uint32_t *vec_idx)
+{
+	uint16_t wrapped_idx = id & (vq->size - 1);
+	uint32_t idx = vq->avail->ring[wrapped_idx];
+	uint8_t next_desc;
+	uint32_t vec_id = *vec_idx;
+
+	do {
+		next_desc = 0;
+		vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
+		vq->buf_vec[vec_id].buf_len = vq->desc[idx].len;
+		vq->buf_vec[vec_id].desc_idx = idx;
+		vec_id++;
+
+		if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
+			idx = vq->desc[idx].next;
+			next_desc = 1;
+		}
+	} while (next_desc);
+
+	*vec_idx = vec_id;
+}
+
 /*
  * This function works for mergeable RX.
  */
@@ -466,7 +509,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		uint16_t need_cnt;
 		uint32_t vec_idx = 0;
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
-		uint16_t i, id;
+		uint16_t i;
 
 		do {
 			/*
@@ -490,18 +533,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 						(res_cur_idx) & (vq->size - 1);
 					uint32_t idx =
 						vq->avail->ring[wrapped_idx];
-					uint8_t next_desc;
-
-					do {
-						next_desc = 0;
-						secure_len += vq->desc[idx].len;
-						if (vq->desc[idx].flags &
-							VRING_DESC_F_NEXT) {
-							idx = vq->desc[idx].next;
-							next_desc = 1;
-						}
-					} while (next_desc);
 
+					update_secure_len(vq, idx, &secure_len);
 					res_cur_idx++;
 				}
 			} while (pkt_len > secure_len);
@@ -512,28 +545,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 							res_cur_idx);
 		} while (success == 0);
 
-		id = res_base_idx;
 		need_cnt = res_cur_idx - res_base_idx;
 
-		for (i = 0; i < need_cnt; i++, id++) {
-			uint16_t wrapped_idx = id & (vq->size - 1);
-			uint32_t idx = vq->avail->ring[wrapped_idx];
-			uint8_t next_desc;
-			do {
-				next_desc = 0;
-				vq->buf_vec[vec_idx].buf_addr =
-					vq->desc[idx].addr;
-				vq->buf_vec[vec_idx].buf_len =
-					vq->desc[idx].len;
-				vq->buf_vec[vec_idx].desc_idx = idx;
-				vec_idx++;
-
-				if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
-					idx = vq->desc[idx].next;
-					next_desc = 1;
-				}
-			} while (next_desc);
-		}
+		for (i = 0; i < need_cnt; i++)
+			fill_buf_vec(vq, res_base_idx + i, &vec_idx);
 
 		res_end_idx = res_cur_idx;
 
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 4/4] lib_vhost: Remove unnecessary vring descriptor length updating
  2015-06-02  8:51     ` [dpdk-dev] [PATCH v4 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                         ` (2 preceding siblings ...)
  2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 3/4] lib_vhost: Extract function Ouyang Changchun
@ 2015-06-02  8:51       ` Ouyang Changchun
  2015-06-03  6:02       ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-02  8:51 UTC (permalink / raw)
  To: dev

Remove these unnecessary vring descriptor length updating, vhost should not change them.
virtio in front end should assign value to desc.len for both rx and tx.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index e08609c..6ec3d5f 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -136,7 +136,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		 * placed in separate buffers.
 		 */
 		if (desc->flags & VRING_DESC_F_NEXT) {
-			desc->len = vq->vhost_hlen;
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
@@ -290,7 +289,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	if (vb_avail == 0) {
 		uint32_t desc_idx =
 			vq->buf_vec[vec_idx].desc_idx;
-		vq->desc[desc_idx].len = vq->vhost_hlen;
 
 		if ((vq->desc[desc_idx].flags
 			& VRING_DESC_F_NEXT) == 0) {
@@ -374,7 +372,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 					 */
 					uint32_t desc_idx =
 						vq->buf_vec[vec_idx].desc_idx;
-					vq->desc[desc_idx].len = vb_offset;
 
 					if ((vq->desc[desc_idx].flags &
 						VRING_DESC_F_NEXT) == 0) {
@@ -409,26 +406,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 				/*
 				 * This whole packet completes.
 				 */
-				uint32_t desc_idx =
-					vq->buf_vec[vec_idx].desc_idx;
-				vq->desc[desc_idx].len = vb_offset;
-
-				while (vq->desc[desc_idx].flags &
-					VRING_DESC_F_NEXT) {
-					desc_idx = vq->desc[desc_idx].next;
-					 vq->desc[desc_idx].len = 0;
-				}
-
 				/* Update used ring with desc information */
 				vq->used->ring[cur_idx & (vq->size - 1)].id
 					= vq->buf_vec[vec_idx].desc_idx;
 				vq->used->ring[cur_idx & (vq->size - 1)].len
 					= entry_len;
-				entry_len = 0;
-				cur_idx++;
 				entry_success++;
-				seg_avail = 0;
-				cpy_len = RTE_MIN(vb_avail, seg_avail);
+				break;
 			}
 		}
 	}
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue
  2015-06-02  8:51     ` [dpdk-dev] [PATCH v4 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                         ` (3 preceding siblings ...)
  2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
@ 2015-06-03  6:02       ` Ouyang Changchun
  2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
                           ` (5 more replies)
  4 siblings, 6 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-03  6:02 UTC (permalink / raw)
  To: dev

Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
 1 file changed, 111 insertions(+), 90 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-06-03  6:02       ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
@ 2015-06-03  6:02         ` Ouyang Changchun
  2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 2/4] lib_vhost: Refine code style Ouyang Changchun
                           ` (4 subsequent siblings)
  5 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-03  6:02 UTC (permalink / raw)
  To: dev

Vring enqueue need consider the 2 cases:
 1. use separate descriptors to contain virtio header and actual data, e.g. the first descriptor
    is for virtio header, and then followed by descriptors for actual data.
 2. virtio header and some data are put together in one descriptor, e.g. the first descriptor contain both
    virtio header and part of actual data, and then followed by more descriptors for rest of packet data,
    current DPDK based virtio-net pmd implementation is this case;

So does vring dequeue, it should not assume vring descriptor is chained or not chained, it should use
desc->flags to check whether it is chained or not. this patch also fixes TX corrupt issue when vhost
co-work with virtio-net driver which uses one single vring descriptor(header and data are in one descriptor)
for virtio tx process on default.

Changes in v5
  - support virtio header with partial data in first descriptor and then followed by descriptor for rest data

Changes in v4
  - remove unnecessary check for mbuf 'next' pointer
  - refine packet copying completeness check

Changes in v3
  - support scattered mbuf, check the mbuf has 'next' pointer or not and copy all segments to vring buffer.

Changes in v2
  - drop the uncompleted packet
  - refine code logic

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 89 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 18 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 4809d32..a11d007 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -46,7 +46,8 @@
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
  * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when mergeable is disabled.
+ * added to the RX queue. This function works when the mbuf is scattered, but
+ * it doesn't support the mergeable feature.
  */
 static inline uint32_t __attribute__((always_inline))
 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
@@ -59,7 +60,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint64_t buff_addr = 0;
 	uint64_t buff_hdr_addr = 0;
-	uint32_t head[MAX_PKT_BURST], packet_len = 0;
+	uint32_t head[MAX_PKT_BURST];
 	uint32_t head_idx, packet_success = 0;
 	uint16_t avail_idx, res_cur_idx;
 	uint16_t res_base_idx, res_end_idx;
@@ -113,6 +114,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	rte_prefetch0(&vq->desc[head[packet_success]]);
 
 	while (res_cur_idx != res_end_idx) {
+		uint32_t offset = 0, vb_offset = 0;
+		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
+		uint8_t hdr = 0, uncompleted_pkt = 0;
+
 		/* Get descriptor from available ring */
 		desc = &vq->desc[head[packet_success]];
 
@@ -125,39 +130,82 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 		/* Copy virtio_hdr to packet and increment buffer address */
 		buff_hdr_addr = buff_addr;
-		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
 
 		/*
 		 * If the descriptors are chained the header and data are
 		 * placed in separate buffers.
 		 */
-		if (desc->flags & VRING_DESC_F_NEXT) {
+		if ((desc->flags & VRING_DESC_F_NEXT) &&
+			(desc->len == vq->vhost_hlen)) {
 			desc->len = vq->vhost_hlen;
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
-			desc->len = rte_pktmbuf_data_len(buff);
 		} else {
-			buff_addr += vq->vhost_hlen;
-			desc->len = packet_len;
+			vb_offset += vq->vhost_hlen;
+			hdr = 1;
 		}
 
+		pkt_len = rte_pktmbuf_pkt_len(buff);
+		data_len = rte_pktmbuf_data_len(buff);
+		len_to_cpy = RTE_MIN(data_len,
+			hdr ? desc->len - vq->vhost_hlen : desc->len);
+		while (total_copied < pkt_len) {
+			/* Copy mbuf data to buffer */
+			rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
+				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
+				len_to_cpy);
+			PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
+				len_to_cpy, 0);
+
+			offset += len_to_cpy;
+			vb_offset += len_to_cpy;
+			total_copied += len_to_cpy;
+
+			/* The whole packet completes */
+			if (total_copied == pkt_len)
+				break;
+
+			/* The current segment completes */
+			if (offset == data_len) {
+				buff = buff->next;
+				offset = 0;
+				data_len = rte_pktmbuf_data_len(buff);
+			}
+
+			/* The current vring descriptor done */
+			if (vb_offset == desc->len) {
+				if (desc->flags & VRING_DESC_F_NEXT) {
+					desc = &vq->desc[desc->next];
+					buff_addr = gpa_to_vva(dev, desc->addr);
+					vb_offset = 0;
+				} else {
+					/* Room in vring buffer is not enough */
+					uncompleted_pkt = 1;
+					break;
+				}
+			}
+			len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
+		};
+
 		/* Update used ring with desc information */
 		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
 							head[packet_success];
-		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
 
-		/* Copy mbuf data to buffer */
-		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
-		rte_memcpy((void *)(uintptr_t)buff_addr,
-			rte_pktmbuf_mtod(buff, const void *),
-			rte_pktmbuf_data_len(buff));
-		PRINT_PACKET(dev, (uintptr_t)buff_addr,
-			rte_pktmbuf_data_len(buff), 0);
+		/* Drop the packet if it is uncompleted */
+		if (unlikely(uncompleted_pkt == 1))
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							vq->vhost_hlen;
+		else
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							pkt_len + vq->vhost_hlen;
 
 		res_cur_idx++;
 		packet_success++;
 
+		if (unlikely(uncompleted_pkt == 1))
+			continue;
+
 		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
 			(const void *)&virtio_hdr, vq->vhost_hlen);
 
@@ -589,7 +637,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		desc = &vq->desc[head[entry_success]];
 
 		/* Discard first buffer as it is the virtio header */
-		desc = &vq->desc[desc->next];
+		if (desc->flags & VRING_DESC_F_NEXT) {
+			desc = &vq->desc[desc->next];
+			vb_offset = 0;
+			vb_avail = desc->len;
+		} else {
+			vb_offset = vq->vhost_hlen;
+			vb_avail = desc->len - vb_offset;
+		}
 
 		/* Buffer address translation. */
 		vb_addr = gpa_to_vva(dev, desc->addr);
@@ -608,8 +663,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		vq->used->ring[used_idx].id = head[entry_success];
 		vq->used->ring[used_idx].len = 0;
 
-		vb_offset = 0;
-		vb_avail = desc->len;
 		/* Allocate an mbuf and populate the structure. */
 		m = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(m == NULL)) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 2/4] lib_vhost: Refine code style
  2015-06-03  6:02       ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
@ 2015-06-03  6:02         ` Ouyang Changchun
  2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 3/4] lib_vhost: Extract function Ouyang Changchun
                           ` (3 subsequent siblings)
  5 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-03  6:02 UTC (permalink / raw)
  To: dev

Remove unnecessary new line.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index a11d007..5824ffc 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -266,8 +266,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	 * (guest physical addr -> vhost virtual addr)
 	 */
 	vq = dev->virtqueue[VIRTIO_RXQ];
-	vb_addr =
-		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 	vb_hdr_addr = vb_addr;
 
 	/* Prefetch buffer address. */
@@ -285,8 +284,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 
 	seg_avail = rte_pktmbuf_data_len(pkt);
 	vb_offset = vq->vhost_hlen;
-	vb_avail =
-		vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
+	vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
 
 	entry_len = vq->vhost_hlen;
 
@@ -309,8 +307,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 		}
 
 		vec_idx++;
-		vb_addr =
-			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+		vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)vb_addr);
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 3/4] lib_vhost: Extract function
  2015-06-03  6:02       ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
  2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 2/4] lib_vhost: Refine code style Ouyang Changchun
@ 2015-06-03  6:02         ` Ouyang Changchun
  2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
                           ` (2 subsequent siblings)
  5 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-03  6:02 UTC (permalink / raw)
  To: dev

Extract codes into 2 common functions:
update_secure_len which is used to accumulate the buffer len in the vring descriptors.
and fill_buf_vec which is used to fill struct buf_vec.

Changes in v5
  - merge fill_buf_vec into update_secure_len
  - do both tasks in one-time loop

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 85 ++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 49 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 5824ffc..5c31a88 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -437,6 +437,34 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	return entry_success;
 }
 
+static inline void __attribute__((always_inline))
+update_secure_len(struct vhost_virtqueue *vq, uint32_t id,
+	uint32_t *secure_len, uint32_t *vec_idx)
+{
+	uint16_t wrapped_idx = id & (vq->size - 1);
+	uint32_t idx = vq->avail->ring[wrapped_idx];
+	uint8_t next_desc;
+	uint32_t len = *secure_len;
+	uint32_t vec_id = *vec_idx;
+
+	do {
+		next_desc = 0;
+		len += vq->desc[idx].len;
+		vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
+		vq->buf_vec[vec_id].buf_len = vq->desc[idx].len;
+		vq->buf_vec[vec_id].desc_idx = idx;
+		vec_id++;
+
+		if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
+			idx = vq->desc[idx].next;
+			next_desc = 1;
+		}
+	} while (next_desc);
+
+	*secure_len = len;
+	*vec_idx = vec_id;
+}
+
 /*
  * This function works for mergeable RX.
  */
@@ -446,8 +474,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 {
 	struct vhost_virtqueue *vq;
 	uint32_t pkt_idx = 0, entry_success = 0;
-	uint16_t avail_idx, res_cur_idx;
-	uint16_t res_base_idx, res_end_idx;
+	uint16_t avail_idx;
+	uint16_t res_base_idx, res_cur_idx;
 	uint8_t success = 0;
 
 	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
@@ -463,17 +491,16 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		return 0;
 
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
-		uint32_t secure_len = 0;
-		uint16_t need_cnt;
-		uint32_t vec_idx = 0;
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
-		uint16_t i, id;
 
 		do {
 			/*
 			 * As many data cores may want access to available
 			 * buffers, they need to be reserved.
 			 */
+			uint32_t secure_len = 0;
+			uint32_t vec_idx = 0;
+
 			res_base_idx = vq->last_used_idx_res;
 			res_cur_idx = res_base_idx;
 
@@ -487,22 +514,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 						dev->device_fh);
 					return pkt_idx;
 				} else {
-					uint16_t wrapped_idx =
-						(res_cur_idx) & (vq->size - 1);
-					uint32_t idx =
-						vq->avail->ring[wrapped_idx];
-					uint8_t next_desc;
-
-					do {
-						next_desc = 0;
-						secure_len += vq->desc[idx].len;
-						if (vq->desc[idx].flags &
-							VRING_DESC_F_NEXT) {
-							idx = vq->desc[idx].next;
-							next_desc = 1;
-						}
-					} while (next_desc);
-
+					update_secure_len(vq, res_cur_idx, &secure_len, &vec_idx);
 					res_cur_idx++;
 				}
 			} while (pkt_len > secure_len);
@@ -513,33 +525,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 							res_cur_idx);
 		} while (success == 0);
 
-		id = res_base_idx;
-		need_cnt = res_cur_idx - res_base_idx;
-
-		for (i = 0; i < need_cnt; i++, id++) {
-			uint16_t wrapped_idx = id & (vq->size - 1);
-			uint32_t idx = vq->avail->ring[wrapped_idx];
-			uint8_t next_desc;
-			do {
-				next_desc = 0;
-				vq->buf_vec[vec_idx].buf_addr =
-					vq->desc[idx].addr;
-				vq->buf_vec[vec_idx].buf_len =
-					vq->desc[idx].len;
-				vq->buf_vec[vec_idx].desc_idx = idx;
-				vec_idx++;
-
-				if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
-					idx = vq->desc[idx].next;
-					next_desc = 1;
-				}
-			} while (next_desc);
-		}
-
-		res_end_idx = res_cur_idx;
-
 		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
-			res_end_idx, pkts[pkt_idx]);
+			res_cur_idx, pkts[pkt_idx]);
 
 		rte_compiler_barrier();
 
@@ -551,7 +538,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 			rte_pause();
 
 		*(volatile uint16_t *)&vq->used->idx += entry_success;
-		vq->last_used_idx = res_end_idx;
+		vq->last_used_idx = res_cur_idx;
 
 		/* flush used->idx update before we read avail->flags. */
 		rte_mb();
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 4/4] lib_vhost: Remove unnecessary vring descriptor length updating
  2015-06-03  6:02       ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                           ` (2 preceding siblings ...)
  2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 3/4] lib_vhost: Extract function Ouyang Changchun
@ 2015-06-03  6:02         ` Ouyang Changchun
  2015-06-03  7:50         ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Xu, Qian Q
  2015-06-08  3:18         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
  5 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-03  6:02 UTC (permalink / raw)
  To: dev

Remove these unnecessary vring descriptor length updating, vhost should not change them.
virtio in front end should assign value to desc.len for both rx and tx.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 5c31a88..07bc16c 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -137,7 +137,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		 */
 		if ((desc->flags & VRING_DESC_F_NEXT) &&
 			(desc->len == vq->vhost_hlen)) {
-			desc->len = vq->vhost_hlen;
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
@@ -291,7 +290,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	if (vb_avail == 0) {
 		uint32_t desc_idx =
 			vq->buf_vec[vec_idx].desc_idx;
-		vq->desc[desc_idx].len = vq->vhost_hlen;
 
 		if ((vq->desc[desc_idx].flags
 			& VRING_DESC_F_NEXT) == 0) {
@@ -375,7 +373,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 					 */
 					uint32_t desc_idx =
 						vq->buf_vec[vec_idx].desc_idx;
-					vq->desc[desc_idx].len = vb_offset;
 
 					if ((vq->desc[desc_idx].flags &
 						VRING_DESC_F_NEXT) == 0) {
@@ -410,26 +407,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 				/*
 				 * This whole packet completes.
 				 */
-				uint32_t desc_idx =
-					vq->buf_vec[vec_idx].desc_idx;
-				vq->desc[desc_idx].len = vb_offset;
-
-				while (vq->desc[desc_idx].flags &
-					VRING_DESC_F_NEXT) {
-					desc_idx = vq->desc[desc_idx].next;
-					 vq->desc[desc_idx].len = 0;
-				}
-
 				/* Update used ring with desc information */
 				vq->used->ring[cur_idx & (vq->size - 1)].id
 					= vq->buf_vec[vec_idx].desc_idx;
 				vq->used->ring[cur_idx & (vq->size - 1)].len
 					= entry_len;
-				entry_len = 0;
-				cur_idx++;
 				entry_success++;
-				seg_avail = 0;
-				cpy_len = RTE_MIN(vb_avail, seg_avail);
+				break;
 			}
 		}
 	}
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue
  2015-06-03  6:02       ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                           ` (3 preceding siblings ...)
  2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
@ 2015-06-03  7:50         ` Xu, Qian Q
  2015-06-08  3:18         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
  5 siblings, 0 replies; 52+ messages in thread
From: Xu, Qian Q @ 2015-06-03  7:50 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

Tested-by: Qian Xu<qian.q.xu@intel.com>
Signed-off-by: Qian Xu<qian.q.xu@intel.com>

-Tested commit: 1a1109404e702d3ad1ccc1033df55c59bec1f89a
-Host OS/Kernel: FC21/3.19
-Guest OS/Kernel: FC21/3.19
-NIC: Intel 82599 10G
-Default x86_64-native-linuxapp-gcc configuration
-Total 2 cases, 2 passed.

Test Case 1:  test_perf_vhost_one_vm_dpdk_fwd_vhost-user
====================================================
On host:

1. Start up vhost-switch, vm2vm 0 means only one vm without vm to vm communication::

    taskset -c 18-20 <dpdk_folder>/examples/vhost/build/vhost-switch -c 0xf -n 4 --huge-dir /mnt/huge --socket-mem 1024,1024 -- -p 1 --mergeable 0 --zero-copy 0 --vm2vm 0 
   

2. Start VM with vhost user as backend::

taskset -c 22-28 \
/home/qxu10/qemu-2.2.0/x86_64-softmmu/qemu-system-x86_64 -name us-vhost-vm1 -cpu host \
-enable-kvm -m 4096 -object memory-backend-file,id=mem,size=4096M,mem-path=/mnt/huge,share=on -numa node,memdev=mem -mem-prealloc \
-smp cores=20,sockets=1 -drive file=/home/img/fc21-vm1.img \
-chardev socket,id=char0,path=/home/qxu10/dpdk/vhost-net -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce=on \
-device virtio-net-pci,mac=52:54:00:00:00:01,netdev=mynet1 \
-chardev socket,id=char1,path=/home/qxu10/dpdk/vhost-net -netdev type=vhost-user,id=mynet2,chardev=char1,vhostforce=on \
-device virtio-net-pci,mac=52:54:00:00:00:02,netdev=mynet2 \
-netdev tap,id=ipvm1,ifname=tap3,script=/etc/qemu-ifup -device rtl8139,netdev=ipvm1,id=net0,mac=00:00:00:00:00:09 -nographic

On guest:

3. ensure the dpdk folder copied to the guest with the same config file and build process as host. Then bind 2 virtio devices to igb_uio and start testpmd, below is the step for reference::

    ./<dpdk_folder>/tools/dpdk_nic_bind.py --bind igb_uio 00:03.0 00:04.0

    ./<dpdk_folder>/x86_64-native-linuxapp-gcc/app/test-pmd/testpmd -c f -n 4 -- -i --txqflags 0x0f00 --rxq=2 --disable-hw-vlan-filter
    
    $ >set fwd mac
    
    $ >start tx_first

4. After typing start tx_first in testpmd, user can see there would be 2 virtio device with MAC and vlan id registered in vhost-user, the log would be shown in host's vhost-sample output.

5. Send traffic(30second) to virtio1 and virtio2, and set the packet size from 64 to 1518. Check the performance in Mpps. The traffic sent to virtio1 should have the DEST MAC of Virtio1's MAC, Vlan id of Virtio1. The traffic sent to virtio2 should have the DEST MAC of Virtio2's MAC, Vlan id of Virtio2. The traffic's DEST IP and SRC IP is continuously incremental,e.g(from 192.168.1.1 to 192.168.1.63), so the packets can go to different queues via RSS/Hash. As to the functionality criteria, The received rate should not be zero. As to the performance criteria, need check it with developer or design doc/PRD. 

6. Check that if the packets have been to different queue at the guest testpmd stats display.
 
7. Check the packet data integrity. 
    
Test Case 2:  test_perf_virtio_one_vm_linux_fwd_vhost-user
===================================================
On host:

Same step as in TestCase1.

On guest:   
  
1. Set up routing on guest::

    $ systemctl stop firewalld.service
    
    $ systemctl disable firewalld.service
    
    $ systemctl stop ip6tables.service
    
    $ systemctl disable ip6tables.service

    $ systemctl stop iptables.service
    
    $ systemctl disable iptables.service

    $ systemctl stop NetworkManager.service
    
    $ systemctl disable NetworkManager.service
 
    $ echo 1 >/proc/sys/net/ipv4/ip_forward

    $ ip addr add 192.168.1.2/24 dev eth1    # eth1 is virtio1
    
    $ ip neigh add 192.168.1.1 lladdr 00:00:00:00:0a:0a dev eth1
    
    $ ip link set dev eth1 up
    
    $ ip addr add 192.168.2.2/24 dev eth2    # eth2 is virtio2
    
    $ ip neigh add 192.168.2.1 lladdr 00:00:00:00:00:0a  dev eth2
    
    $ ip link set dev eth2 up

2. Send traffic(30second) to virtio1 and virtio2. According to above script, traffic sent to virtio1 should have SRC IP (e.g: 192.168.1.1), DEST IP(e.g:192.168.2.1), DEST MAC as virtio1's MAC, VLAN ID as virtio1's VLAN. Traffic sent to virtio2 has the similar setting, SRC IP(e.g:192.168.2.1), DEST IP(e.g: 192.168.1.1), VLAN ID as virtio2's VLAN. Set the packet size from 64 to 1518 as well as jumbo frame.Check the performance in Mpps.As to the functionality criteria, The received rate should not be zero. As to the performance criteria, need check it with developer or design doc/PRD. 

3. Check if the data integrity of the forwarded packets, ensure no content changes.  

Thanks
Qian


-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
Sent: Wednesday, June 03, 2015 2:02 PM
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue

Fix enqueue/dequeue can't handle chained vring descriptors; Remove unnecessary vring descriptor length updating; Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
 1 file changed, 111 insertions(+), 90 deletions(-)

--
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v6 0/4] Fix vhost enqueue/dequeue issue
  2015-06-03  6:02       ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                           ` (4 preceding siblings ...)
  2015-06-03  7:50         ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Xu, Qian Q
@ 2015-06-08  3:18         ` Ouyang Changchun
  2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
                             ` (4 more replies)
  5 siblings, 5 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-08  3:18 UTC (permalink / raw)
  To: dev

Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
 1 file changed, 111 insertions(+), 90 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v6 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-06-08  3:18         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
@ 2015-06-08  3:18           ` Ouyang Changchun
  2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 2/4] lib_vhost: Refine code style Ouyang Changchun
                             ` (3 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-08  3:18 UTC (permalink / raw)
  To: dev; +Cc: root

Vring enqueue need consider the 2 cases:
 1. use separate descriptors to contain virtio header and actual data, e.g. the first descriptor
    is for virtio header, and then followed by descriptors for actual data.
 2. virtio header and some data are put together in one descriptor, e.g. the first descriptor contain both
    virtio header and part of actual data, and then followed by more descriptors for rest of packet data,
    current DPDK based virtio-net pmd implementation is this case;

So does vring dequeue, it should not assume vring descriptor is chained or not chained, it should use
desc->flags to check whether it is chained or not. this patch also fixes TX corrupt issue when vhost
co-work with virtio-net driver which uses one single vring descriptor(header and data are in one descriptor)
for virtio tx process on default.

Changes in v6
  - move desc->len change to here to increase code readability

Changes in v5
  - support virtio header with partial data in first descriptor and then followed by descriptor for rest data

Changes in v4
  - remove unnecessary check for mbuf 'next' pointer
  - refine packet copying completeness check

Changes in v3
  - support scattered mbuf, check the mbuf has 'next' pointer or not and copy all segments to vring buffer.

Changes in v2
  - drop the uncompleted packet
  - refine code logic

Signed-off-by: root <root@dpdk20.sh.intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 90 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 19 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 4809d32..b887e0b 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -46,7 +46,8 @@
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
  * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when mergeable is disabled.
+ * added to the RX queue. This function works when the mbuf is scattered, but
+ * it doesn't support the mergeable feature.
  */
 static inline uint32_t __attribute__((always_inline))
 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
@@ -59,7 +60,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint64_t buff_addr = 0;
 	uint64_t buff_hdr_addr = 0;
-	uint32_t head[MAX_PKT_BURST], packet_len = 0;
+	uint32_t head[MAX_PKT_BURST];
 	uint32_t head_idx, packet_success = 0;
 	uint16_t avail_idx, res_cur_idx;
 	uint16_t res_base_idx, res_end_idx;
@@ -113,6 +114,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	rte_prefetch0(&vq->desc[head[packet_success]]);
 
 	while (res_cur_idx != res_end_idx) {
+		uint32_t offset = 0, vb_offset = 0;
+		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
+		uint8_t hdr = 0, uncompleted_pkt = 0;
+
 		/* Get descriptor from available ring */
 		desc = &vq->desc[head[packet_success]];
 
@@ -125,39 +130,81 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 		/* Copy virtio_hdr to packet and increment buffer address */
 		buff_hdr_addr = buff_addr;
-		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
 
 		/*
 		 * If the descriptors are chained the header and data are
 		 * placed in separate buffers.
 		 */
-		if (desc->flags & VRING_DESC_F_NEXT) {
-			desc->len = vq->vhost_hlen;
+		if ((desc->flags & VRING_DESC_F_NEXT) &&
+			(desc->len == vq->vhost_hlen)) {
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
-			desc->len = rte_pktmbuf_data_len(buff);
 		} else {
-			buff_addr += vq->vhost_hlen;
-			desc->len = packet_len;
+			vb_offset += vq->vhost_hlen;
+			hdr = 1;
 		}
 
+		pkt_len = rte_pktmbuf_pkt_len(buff);
+		data_len = rte_pktmbuf_data_len(buff);
+		len_to_cpy = RTE_MIN(data_len,
+			hdr ? desc->len - vq->vhost_hlen : desc->len);
+		while (total_copied < pkt_len) {
+			/* Copy mbuf data to buffer */
+			rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
+				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
+				len_to_cpy);
+			PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
+				len_to_cpy, 0);
+
+			offset += len_to_cpy;
+			vb_offset += len_to_cpy;
+			total_copied += len_to_cpy;
+
+			/* The whole packet completes */
+			if (total_copied == pkt_len)
+				break;
+
+			/* The current segment completes */
+			if (offset == data_len) {
+				buff = buff->next;
+				offset = 0;
+				data_len = rte_pktmbuf_data_len(buff);
+			}
+
+			/* The current vring descriptor done */
+			if (vb_offset == desc->len) {
+				if (desc->flags & VRING_DESC_F_NEXT) {
+					desc = &vq->desc[desc->next];
+					buff_addr = gpa_to_vva(dev, desc->addr);
+					vb_offset = 0;
+				} else {
+					/* Room in vring buffer is not enough */
+					uncompleted_pkt = 1;
+					break;
+				}
+			}
+			len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
+		};
+
 		/* Update used ring with desc information */
 		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
 							head[packet_success];
-		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
 
-		/* Copy mbuf data to buffer */
-		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
-		rte_memcpy((void *)(uintptr_t)buff_addr,
-			rte_pktmbuf_mtod(buff, const void *),
-			rte_pktmbuf_data_len(buff));
-		PRINT_PACKET(dev, (uintptr_t)buff_addr,
-			rte_pktmbuf_data_len(buff), 0);
+		/* Drop the packet if it is uncompleted */
+		if (unlikely(uncompleted_pkt == 1))
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							vq->vhost_hlen;
+		else
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							pkt_len + vq->vhost_hlen;
 
 		res_cur_idx++;
 		packet_success++;
 
+		if (unlikely(uncompleted_pkt == 1))
+			continue;
+
 		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
 			(const void *)&virtio_hdr, vq->vhost_hlen);
 
@@ -589,7 +636,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		desc = &vq->desc[head[entry_success]];
 
 		/* Discard first buffer as it is the virtio header */
-		desc = &vq->desc[desc->next];
+		if (desc->flags & VRING_DESC_F_NEXT) {
+			desc = &vq->desc[desc->next];
+			vb_offset = 0;
+			vb_avail = desc->len;
+		} else {
+			vb_offset = vq->vhost_hlen;
+			vb_avail = desc->len - vb_offset;
+		}
 
 		/* Buffer address translation. */
 		vb_addr = gpa_to_vva(dev, desc->addr);
@@ -608,8 +662,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		vq->used->ring[used_idx].id = head[entry_success];
 		vq->used->ring[used_idx].len = 0;
 
-		vb_offset = 0;
-		vb_avail = desc->len;
 		/* Allocate an mbuf and populate the structure. */
 		m = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(m == NULL)) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v6 2/4] lib_vhost: Refine code style
  2015-06-08  3:18         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
  2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
@ 2015-06-08  3:18           ` Ouyang Changchun
  2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 3/4] lib_vhost: Extract function Ouyang Changchun
                             ` (2 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-08  3:18 UTC (permalink / raw)
  To: dev; +Cc: root

Remove unnecessary new line.

Signed-off-by: root <root@dpdk20.sh.intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index b887e0b..1f145bf 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -265,8 +265,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	 * (guest physical addr -> vhost virtual addr)
 	 */
 	vq = dev->virtqueue[VIRTIO_RXQ];
-	vb_addr =
-		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 	vb_hdr_addr = vb_addr;
 
 	/* Prefetch buffer address. */
@@ -284,8 +283,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 
 	seg_avail = rte_pktmbuf_data_len(pkt);
 	vb_offset = vq->vhost_hlen;
-	vb_avail =
-		vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
+	vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
 
 	entry_len = vq->vhost_hlen;
 
@@ -308,8 +306,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 		}
 
 		vec_idx++;
-		vb_addr =
-			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+		vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)vb_addr);
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v6 3/4] lib_vhost: Extract function
  2015-06-08  3:18         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
  2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
  2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 2/4] lib_vhost: Refine code style Ouyang Changchun
@ 2015-06-08  3:18           ` Ouyang Changchun
  2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
  2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-08  3:18 UTC (permalink / raw)
  To: dev; +Cc: root

Extract codes into 2 common functions:
update_secure_len which is used to accumulate the buffer len in the vring descriptors.
and fill_buf_vec which is used to fill struct buf_vec.

Changes in v5
  - merge fill_buf_vec into update_secure_len
  - do both tasks in one-time loop

Signed-off-by: root <root@dpdk20.sh.intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 85 ++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 49 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 1f145bf..aaf77ed 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -436,6 +436,34 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	return entry_success;
 }
 
+static inline void __attribute__((always_inline))
+update_secure_len(struct vhost_virtqueue *vq, uint32_t id,
+	uint32_t *secure_len, uint32_t *vec_idx)
+{
+	uint16_t wrapped_idx = id & (vq->size - 1);
+	uint32_t idx = vq->avail->ring[wrapped_idx];
+	uint8_t next_desc;
+	uint32_t len = *secure_len;
+	uint32_t vec_id = *vec_idx;
+
+	do {
+		next_desc = 0;
+		len += vq->desc[idx].len;
+		vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
+		vq->buf_vec[vec_id].buf_len = vq->desc[idx].len;
+		vq->buf_vec[vec_id].desc_idx = idx;
+		vec_id++;
+
+		if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
+			idx = vq->desc[idx].next;
+			next_desc = 1;
+		}
+	} while (next_desc);
+
+	*secure_len = len;
+	*vec_idx = vec_id;
+}
+
 /*
  * This function works for mergeable RX.
  */
@@ -445,8 +473,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 {
 	struct vhost_virtqueue *vq;
 	uint32_t pkt_idx = 0, entry_success = 0;
-	uint16_t avail_idx, res_cur_idx;
-	uint16_t res_base_idx, res_end_idx;
+	uint16_t avail_idx;
+	uint16_t res_base_idx, res_cur_idx;
 	uint8_t success = 0;
 
 	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
@@ -462,17 +490,16 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		return 0;
 
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
-		uint32_t secure_len = 0;
-		uint16_t need_cnt;
-		uint32_t vec_idx = 0;
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
-		uint16_t i, id;
 
 		do {
 			/*
 			 * As many data cores may want access to available
 			 * buffers, they need to be reserved.
 			 */
+			uint32_t secure_len = 0;
+			uint32_t vec_idx = 0;
+
 			res_base_idx = vq->last_used_idx_res;
 			res_cur_idx = res_base_idx;
 
@@ -486,22 +513,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 						dev->device_fh);
 					return pkt_idx;
 				} else {
-					uint16_t wrapped_idx =
-						(res_cur_idx) & (vq->size - 1);
-					uint32_t idx =
-						vq->avail->ring[wrapped_idx];
-					uint8_t next_desc;
-
-					do {
-						next_desc = 0;
-						secure_len += vq->desc[idx].len;
-						if (vq->desc[idx].flags &
-							VRING_DESC_F_NEXT) {
-							idx = vq->desc[idx].next;
-							next_desc = 1;
-						}
-					} while (next_desc);
-
+					update_secure_len(vq, res_cur_idx, &secure_len, &vec_idx);
 					res_cur_idx++;
 				}
 			} while (pkt_len > secure_len);
@@ -512,33 +524,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 							res_cur_idx);
 		} while (success == 0);
 
-		id = res_base_idx;
-		need_cnt = res_cur_idx - res_base_idx;
-
-		for (i = 0; i < need_cnt; i++, id++) {
-			uint16_t wrapped_idx = id & (vq->size - 1);
-			uint32_t idx = vq->avail->ring[wrapped_idx];
-			uint8_t next_desc;
-			do {
-				next_desc = 0;
-				vq->buf_vec[vec_idx].buf_addr =
-					vq->desc[idx].addr;
-				vq->buf_vec[vec_idx].buf_len =
-					vq->desc[idx].len;
-				vq->buf_vec[vec_idx].desc_idx = idx;
-				vec_idx++;
-
-				if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
-					idx = vq->desc[idx].next;
-					next_desc = 1;
-				}
-			} while (next_desc);
-		}
-
-		res_end_idx = res_cur_idx;
-
 		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
-			res_end_idx, pkts[pkt_idx]);
+			res_cur_idx, pkts[pkt_idx]);
 
 		rte_compiler_barrier();
 
@@ -550,7 +537,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 			rte_pause();
 
 		*(volatile uint16_t *)&vq->used->idx += entry_success;
-		vq->last_used_idx = res_end_idx;
+		vq->last_used_idx = res_cur_idx;
 
 		/* flush used->idx update before we read avail->flags. */
 		rte_mb();
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v6 4/4] lib_vhost: Remove unnecessary vring descriptor length updating
  2015-06-08  3:18         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
                             ` (2 preceding siblings ...)
  2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 3/4] lib_vhost: Extract function Ouyang Changchun
@ 2015-06-08  3:18           ` Ouyang Changchun
  2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  4 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-08  3:18 UTC (permalink / raw)
  To: dev; +Cc: root

Remove these unnecessary vring descriptor length updating, vhost should not change them.
virtio in front end should assign value to desc.len for both rx and tx.

Signed-off-by: root <root@dpdk20.sh.intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index aaf77ed..07bc16c 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -290,7 +290,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	if (vb_avail == 0) {
 		uint32_t desc_idx =
 			vq->buf_vec[vec_idx].desc_idx;
-		vq->desc[desc_idx].len = vq->vhost_hlen;
 
 		if ((vq->desc[desc_idx].flags
 			& VRING_DESC_F_NEXT) == 0) {
@@ -374,7 +373,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 					 */
 					uint32_t desc_idx =
 						vq->buf_vec[vec_idx].desc_idx;
-					vq->desc[desc_idx].len = vb_offset;
 
 					if ((vq->desc[desc_idx].flags &
 						VRING_DESC_F_NEXT) == 0) {
@@ -409,26 +407,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 				/*
 				 * This whole packet completes.
 				 */
-				uint32_t desc_idx =
-					vq->buf_vec[vec_idx].desc_idx;
-				vq->desc[desc_idx].len = vb_offset;
-
-				while (vq->desc[desc_idx].flags &
-					VRING_DESC_F_NEXT) {
-					desc_idx = vq->desc[desc_idx].next;
-					 vq->desc[desc_idx].len = 0;
-				}
-
 				/* Update used ring with desc information */
 				vq->used->ring[cur_idx & (vq->size - 1)].id
 					= vq->buf_vec[vec_idx].desc_idx;
 				vq->used->ring[cur_idx & (vq->size - 1)].len
 					= entry_len;
-				entry_len = 0;
-				cur_idx++;
 				entry_success++;
-				seg_avail = 0;
-				cpy_len = RTE_MIN(vb_avail, seg_avail);
+				break;
 			}
 		}
 	}
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue
  2015-06-08  3:18         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
                             ` (3 preceding siblings ...)
  2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
@ 2015-06-09  1:03           ` Ouyang Changchun
  2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
                               ` (6 more replies)
  4 siblings, 7 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-09  1:03 UTC (permalink / raw)
  To: dev

Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
 1 file changed, 111 insertions(+), 90 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v7 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
@ 2015-06-09  1:03             ` Ouyang Changchun
  2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 2/4] lib_vhost: Refine code style Ouyang Changchun
                               ` (5 subsequent siblings)
  6 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-09  1:03 UTC (permalink / raw)
  To: dev

Vring enqueue need consider the 2 cases:
 1. use separate descriptors to contain virtio header and actual data, e.g. the first descriptor
    is for virtio header, and then followed by descriptors for actual data.
 2. virtio header and some data are put together in one descriptor, e.g. the first descriptor contain both
    virtio header and part of actual data, and then followed by more descriptors for rest of packet data,
    current DPDK based virtio-net pmd implementation is this case;

So does vring dequeue, it should not assume vring descriptor is chained or not chained, it should use
desc->flags to check whether it is chained or not. this patch also fixes TX corrupt issue when vhost
co-work with virtio-net driver which uses one single vring descriptor(header and data are in one descriptor)
for virtio tx process on default.

Changes in v6
  - move desc->len change to here to increase code readability

Changes in v5
  - support virtio header with partial data in first descriptor and then followed by descriptor for rest data

Changes in v4
  - remove unnecessary check for mbuf 'next' pointer
  - refine packet copying completeness check

Changes in v3
  - support scattered mbuf, check the mbuf has 'next' pointer or not and copy all segments to vring buffer.

Changes in v2
  - drop the uncompleted packet
  - refine code logic

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 90 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 19 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 4809d32..b887e0b 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -46,7 +46,8 @@
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
  * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when mergeable is disabled.
+ * added to the RX queue. This function works when the mbuf is scattered, but
+ * it doesn't support the mergeable feature.
  */
 static inline uint32_t __attribute__((always_inline))
 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
@@ -59,7 +60,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint64_t buff_addr = 0;
 	uint64_t buff_hdr_addr = 0;
-	uint32_t head[MAX_PKT_BURST], packet_len = 0;
+	uint32_t head[MAX_PKT_BURST];
 	uint32_t head_idx, packet_success = 0;
 	uint16_t avail_idx, res_cur_idx;
 	uint16_t res_base_idx, res_end_idx;
@@ -113,6 +114,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	rte_prefetch0(&vq->desc[head[packet_success]]);
 
 	while (res_cur_idx != res_end_idx) {
+		uint32_t offset = 0, vb_offset = 0;
+		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
+		uint8_t hdr = 0, uncompleted_pkt = 0;
+
 		/* Get descriptor from available ring */
 		desc = &vq->desc[head[packet_success]];
 
@@ -125,39 +130,81 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 		/* Copy virtio_hdr to packet and increment buffer address */
 		buff_hdr_addr = buff_addr;
-		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
 
 		/*
 		 * If the descriptors are chained the header and data are
 		 * placed in separate buffers.
 		 */
-		if (desc->flags & VRING_DESC_F_NEXT) {
-			desc->len = vq->vhost_hlen;
+		if ((desc->flags & VRING_DESC_F_NEXT) &&
+			(desc->len == vq->vhost_hlen)) {
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
-			desc->len = rte_pktmbuf_data_len(buff);
 		} else {
-			buff_addr += vq->vhost_hlen;
-			desc->len = packet_len;
+			vb_offset += vq->vhost_hlen;
+			hdr = 1;
 		}
 
+		pkt_len = rte_pktmbuf_pkt_len(buff);
+		data_len = rte_pktmbuf_data_len(buff);
+		len_to_cpy = RTE_MIN(data_len,
+			hdr ? desc->len - vq->vhost_hlen : desc->len);
+		while (total_copied < pkt_len) {
+			/* Copy mbuf data to buffer */
+			rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
+				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
+				len_to_cpy);
+			PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
+				len_to_cpy, 0);
+
+			offset += len_to_cpy;
+			vb_offset += len_to_cpy;
+			total_copied += len_to_cpy;
+
+			/* The whole packet completes */
+			if (total_copied == pkt_len)
+				break;
+
+			/* The current segment completes */
+			if (offset == data_len) {
+				buff = buff->next;
+				offset = 0;
+				data_len = rte_pktmbuf_data_len(buff);
+			}
+
+			/* The current vring descriptor done */
+			if (vb_offset == desc->len) {
+				if (desc->flags & VRING_DESC_F_NEXT) {
+					desc = &vq->desc[desc->next];
+					buff_addr = gpa_to_vva(dev, desc->addr);
+					vb_offset = 0;
+				} else {
+					/* Room in vring buffer is not enough */
+					uncompleted_pkt = 1;
+					break;
+				}
+			}
+			len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
+		};
+
 		/* Update used ring with desc information */
 		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
 							head[packet_success];
-		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
 
-		/* Copy mbuf data to buffer */
-		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
-		rte_memcpy((void *)(uintptr_t)buff_addr,
-			rte_pktmbuf_mtod(buff, const void *),
-			rte_pktmbuf_data_len(buff));
-		PRINT_PACKET(dev, (uintptr_t)buff_addr,
-			rte_pktmbuf_data_len(buff), 0);
+		/* Drop the packet if it is uncompleted */
+		if (unlikely(uncompleted_pkt == 1))
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							vq->vhost_hlen;
+		else
+			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							pkt_len + vq->vhost_hlen;
 
 		res_cur_idx++;
 		packet_success++;
 
+		if (unlikely(uncompleted_pkt == 1))
+			continue;
+
 		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
 			(const void *)&virtio_hdr, vq->vhost_hlen);
 
@@ -589,7 +636,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		desc = &vq->desc[head[entry_success]];
 
 		/* Discard first buffer as it is the virtio header */
-		desc = &vq->desc[desc->next];
+		if (desc->flags & VRING_DESC_F_NEXT) {
+			desc = &vq->desc[desc->next];
+			vb_offset = 0;
+			vb_avail = desc->len;
+		} else {
+			vb_offset = vq->vhost_hlen;
+			vb_avail = desc->len - vb_offset;
+		}
 
 		/* Buffer address translation. */
 		vb_addr = gpa_to_vva(dev, desc->addr);
@@ -608,8 +662,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		vq->used->ring[used_idx].id = head[entry_success];
 		vq->used->ring[used_idx].len = 0;
 
-		vb_offset = 0;
-		vb_avail = desc->len;
 		/* Allocate an mbuf and populate the structure. */
 		m = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(m == NULL)) {
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v7 2/4] lib_vhost: Refine code style
  2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
@ 2015-06-09  1:03             ` Ouyang Changchun
  2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 3/4] lib_vhost: Extract function Ouyang Changchun
                               ` (4 subsequent siblings)
  6 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-09  1:03 UTC (permalink / raw)
  To: dev

Remove unnecessary new line.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index b887e0b..1f145bf 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -265,8 +265,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	 * (guest physical addr -> vhost virtual addr)
 	 */
 	vq = dev->virtqueue[VIRTIO_RXQ];
-	vb_addr =
-		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 	vb_hdr_addr = vb_addr;
 
 	/* Prefetch buffer address. */
@@ -284,8 +283,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 
 	seg_avail = rte_pktmbuf_data_len(pkt);
 	vb_offset = vq->vhost_hlen;
-	vb_avail =
-		vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
+	vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
 
 	entry_len = vq->vhost_hlen;
 
@@ -308,8 +306,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 		}
 
 		vec_idx++;
-		vb_addr =
-			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+		vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)vb_addr);
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v7 3/4] lib_vhost: Extract function
  2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
  2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
  2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 2/4] lib_vhost: Refine code style Ouyang Changchun
@ 2015-06-09  1:03             ` Ouyang Changchun
  2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
                               ` (3 subsequent siblings)
  6 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-09  1:03 UTC (permalink / raw)
  To: dev

Extract codes into 2 common functions:
update_secure_len which is used to accumulate the buffer len in the vring descriptors.
and fill_buf_vec which is used to fill struct buf_vec.

Changes in v5
  - merge fill_buf_vec into update_secure_len
  - do both tasks in one-time loop

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 85 ++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 49 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 1f145bf..aaf77ed 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -436,6 +436,34 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	return entry_success;
 }
 
+static inline void __attribute__((always_inline))
+update_secure_len(struct vhost_virtqueue *vq, uint32_t id,
+	uint32_t *secure_len, uint32_t *vec_idx)
+{
+	uint16_t wrapped_idx = id & (vq->size - 1);
+	uint32_t idx = vq->avail->ring[wrapped_idx];
+	uint8_t next_desc;
+	uint32_t len = *secure_len;
+	uint32_t vec_id = *vec_idx;
+
+	do {
+		next_desc = 0;
+		len += vq->desc[idx].len;
+		vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
+		vq->buf_vec[vec_id].buf_len = vq->desc[idx].len;
+		vq->buf_vec[vec_id].desc_idx = idx;
+		vec_id++;
+
+		if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
+			idx = vq->desc[idx].next;
+			next_desc = 1;
+		}
+	} while (next_desc);
+
+	*secure_len = len;
+	*vec_idx = vec_id;
+}
+
 /*
  * This function works for mergeable RX.
  */
@@ -445,8 +473,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 {
 	struct vhost_virtqueue *vq;
 	uint32_t pkt_idx = 0, entry_success = 0;
-	uint16_t avail_idx, res_cur_idx;
-	uint16_t res_base_idx, res_end_idx;
+	uint16_t avail_idx;
+	uint16_t res_base_idx, res_cur_idx;
 	uint8_t success = 0;
 
 	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
@@ -462,17 +490,16 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		return 0;
 
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
-		uint32_t secure_len = 0;
-		uint16_t need_cnt;
-		uint32_t vec_idx = 0;
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
-		uint16_t i, id;
 
 		do {
 			/*
 			 * As many data cores may want access to available
 			 * buffers, they need to be reserved.
 			 */
+			uint32_t secure_len = 0;
+			uint32_t vec_idx = 0;
+
 			res_base_idx = vq->last_used_idx_res;
 			res_cur_idx = res_base_idx;
 
@@ -486,22 +513,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 						dev->device_fh);
 					return pkt_idx;
 				} else {
-					uint16_t wrapped_idx =
-						(res_cur_idx) & (vq->size - 1);
-					uint32_t idx =
-						vq->avail->ring[wrapped_idx];
-					uint8_t next_desc;
-
-					do {
-						next_desc = 0;
-						secure_len += vq->desc[idx].len;
-						if (vq->desc[idx].flags &
-							VRING_DESC_F_NEXT) {
-							idx = vq->desc[idx].next;
-							next_desc = 1;
-						}
-					} while (next_desc);
-
+					update_secure_len(vq, res_cur_idx, &secure_len, &vec_idx);
 					res_cur_idx++;
 				}
 			} while (pkt_len > secure_len);
@@ -512,33 +524,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 							res_cur_idx);
 		} while (success == 0);
 
-		id = res_base_idx;
-		need_cnt = res_cur_idx - res_base_idx;
-
-		for (i = 0; i < need_cnt; i++, id++) {
-			uint16_t wrapped_idx = id & (vq->size - 1);
-			uint32_t idx = vq->avail->ring[wrapped_idx];
-			uint8_t next_desc;
-			do {
-				next_desc = 0;
-				vq->buf_vec[vec_idx].buf_addr =
-					vq->desc[idx].addr;
-				vq->buf_vec[vec_idx].buf_len =
-					vq->desc[idx].len;
-				vq->buf_vec[vec_idx].desc_idx = idx;
-				vec_idx++;
-
-				if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
-					idx = vq->desc[idx].next;
-					next_desc = 1;
-				}
-			} while (next_desc);
-		}
-
-		res_end_idx = res_cur_idx;
-
 		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
-			res_end_idx, pkts[pkt_idx]);
+			res_cur_idx, pkts[pkt_idx]);
 
 		rte_compiler_barrier();
 
@@ -550,7 +537,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 			rte_pause();
 
 		*(volatile uint16_t *)&vq->used->idx += entry_success;
-		vq->last_used_idx = res_end_idx;
+		vq->last_used_idx = res_cur_idx;
 
 		/* flush used->idx update before we read avail->flags. */
 		rte_mb();
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v7 4/4] lib_vhost: Remove unnecessary vring descriptor length updating
  2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                               ` (2 preceding siblings ...)
  2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 3/4] lib_vhost: Extract function Ouyang Changchun
@ 2015-06-09  1:03             ` Ouyang Changchun
  2015-06-10  1:40             ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Xie, Huawei
                               ` (2 subsequent siblings)
  6 siblings, 0 replies; 52+ messages in thread
From: Ouyang Changchun @ 2015-06-09  1:03 UTC (permalink / raw)
  To: dev

Remove these unnecessary vring descriptor length updating, vhost should not change them.
virtio in front end should assign value to desc.len for both rx and tx.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index aaf77ed..07bc16c 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -290,7 +290,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 	if (vb_avail == 0) {
 		uint32_t desc_idx =
 			vq->buf_vec[vec_idx].desc_idx;
-		vq->desc[desc_idx].len = vq->vhost_hlen;
 
 		if ((vq->desc[desc_idx].flags
 			& VRING_DESC_F_NEXT) == 0) {
@@ -374,7 +373,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 					 */
 					uint32_t desc_idx =
 						vq->buf_vec[vec_idx].desc_idx;
-					vq->desc[desc_idx].len = vb_offset;
 
 					if ((vq->desc[desc_idx].flags &
 						VRING_DESC_F_NEXT) == 0) {
@@ -409,26 +407,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
 				/*
 				 * This whole packet completes.
 				 */
-				uint32_t desc_idx =
-					vq->buf_vec[vec_idx].desc_idx;
-				vq->desc[desc_idx].len = vb_offset;
-
-				while (vq->desc[desc_idx].flags &
-					VRING_DESC_F_NEXT) {
-					desc_idx = vq->desc[desc_idx].next;
-					 vq->desc[desc_idx].len = 0;
-				}
-
 				/* Update used ring with desc information */
 				vq->used->ring[cur_idx & (vq->size - 1)].id
 					= vq->buf_vec[vec_idx].desc_idx;
 				vq->used->ring[cur_idx & (vq->size - 1)].len
 					= entry_len;
-				entry_len = 0;
-				cur_idx++;
 				entry_success++;
-				seg_avail = 0;
-				cpy_len = RTE_MIN(vb_avail, seg_avail);
+				break;
 			}
 		}
 	}
-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue
  2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                               ` (3 preceding siblings ...)
  2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
@ 2015-06-10  1:40             ` Xie, Huawei
  2015-06-10  6:49             ` Xie, Huawei
  2015-06-15  9:42             ` Thomas Monjalon
  6 siblings, 0 replies; 52+ messages in thread
From: Xie, Huawei @ 2015-06-10  1:40 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

Acked-by:  Huawei Xie <huawei.xie@intel.com>
On 6/9/2015 9:03 AM, Ouyang, Changchun wrote:
> Fix enqueue/dequeue can't handle chained vring descriptors;
> Remove unnecessary vring descriptor length updating;
> Add support copying scattered mbuf to vring;
>
> Changchun Ouyang (4):
>   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
>   lib_vhost: Refine code style
>   lib_vhost: Extract function
>   lib_vhost: Remove unnecessary vring descriptor length updating
>
>  lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
>  1 file changed, 111 insertions(+), 90 deletions(-)
>


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue
  2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                               ` (4 preceding siblings ...)
  2015-06-10  1:40             ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Xie, Huawei
@ 2015-06-10  6:49             ` Xie, Huawei
  2015-06-17 14:57               ` Thomas Monjalon
  2015-06-15  9:42             ` Thomas Monjalon
  6 siblings, 1 reply; 52+ messages in thread
From: Xie, Huawei @ 2015-06-10  6:49 UTC (permalink / raw)
  To: Ouyang, Changchun, dev

Acked-by: Huawei Xie <huawei.xie@intel.com>

On 6/9/2015 9:03 AM, Ouyang, Changchun wrote:
> Fix enqueue/dequeue can't handle chained vring descriptors;
> Remove unnecessary vring descriptor length updating;
> Add support copying scattered mbuf to vring;
>
> Changchun Ouyang (4):
>   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
>   lib_vhost: Refine code style
>   lib_vhost: Extract function
>   lib_vhost: Remove unnecessary vring descriptor length updating
>
>  lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
>  1 file changed, 111 insertions(+), 90 deletions(-)
>


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue
  2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
                               ` (5 preceding siblings ...)
  2015-06-10  6:49             ` Xie, Huawei
@ 2015-06-15  9:42             ` Thomas Monjalon
  2015-06-16  1:01               ` Ouyang, Changchun
  6 siblings, 1 reply; 52+ messages in thread
From: Thomas Monjalon @ 2015-06-15  9:42 UTC (permalink / raw)
  To: Ouyang Changchun; +Cc: dev

2015-06-09 09:03, Ouyang Changchun:
> Fix enqueue/dequeue can't handle chained vring descriptors;
> Remove unnecessary vring descriptor length updating;
> Add support copying scattered mbuf to vring;
> 
> Changchun Ouyang (4):
>   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
>   lib_vhost: Refine code style
>   lib_vhost: Extract function
>   lib_vhost: Remove unnecessary vring descriptor length updating

What changed in v7?
Is this test report still valuable for v7?
	http://dpdk.org/ml/archives/dev/2015-June/018610.html

Note: it's really convenient to put the relevant changelog in each commit,
and it would be nicer to have a changelog summary in this cover letter.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue
  2015-06-15  9:42             ` Thomas Monjalon
@ 2015-06-16  1:01               ` Ouyang, Changchun
  0 siblings, 0 replies; 52+ messages in thread
From: Ouyang, Changchun @ 2015-06-16  1:01 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

Hi, Thomas


> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, June 15, 2015 5:43 PM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue
> 
> 2015-06-09 09:03, Ouyang Changchun:
> > Fix enqueue/dequeue can't handle chained vring descriptors; Remove
> > unnecessary vring descriptor length updating; Add support copying
> > scattered mbuf to vring;
> >
> > Changchun Ouyang (4):
> >   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
> >   lib_vhost: Refine code style
> >   lib_vhost: Extract function
> >   lib_vhost: Remove unnecessary vring descriptor length updating
> 
> What changed in v7?
> Is this test report still valuable for v7?
> 	http://dpdk.org/ml/archives/dev/2015-June/018610.html
> 
Nothing really changed from v6 to v7, 
In V6, signed-off-by root, 
In v7, signed-off-by myself.
Yes, test report is still valuable.

> Note: it's really convenient to put the relevant changelog in each commit,
> and it would be nicer to have a changelog summary in this cover letter.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue
  2015-06-10  6:49             ` Xie, Huawei
@ 2015-06-17 14:57               ` Thomas Monjalon
  0 siblings, 0 replies; 52+ messages in thread
From: Thomas Monjalon @ 2015-06-17 14:57 UTC (permalink / raw)
  To: Ouyang, Changchun; +Cc: dev

2015-06-10 06:49, Xie, Huawei:
> On 6/9/2015 9:03 AM, Ouyang, Changchun wrote:
> > Fix enqueue/dequeue can't handle chained vring descriptors;
> > Remove unnecessary vring descriptor length updating;
> > Add support copying scattered mbuf to vring;
> >
> > Changchun Ouyang (4):
> >   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
> >   lib_vhost: Refine code style
> >   lib_vhost: Extract function
> >   lib_vhost: Remove unnecessary vring descriptor length updating
> 
> Acked-by: Huawei Xie <huawei.xie@intel.com>

Applied, thanks

^ permalink raw reply	[flat|nested] 52+ messages in thread

end of thread, other threads:[~2015-06-17 14:58 UTC | newest]

Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-04  6:26 [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
2015-05-12 10:00 ` Thomas Monjalon
2015-05-18  9:39 ` Xie, Huawei
2015-05-18 13:23   ` Ouyang, Changchun
2015-05-20  5:26     ` Xie, Huawei
2015-05-28 15:16 ` [dpdk-dev] [PATCH v2 0/5] Fix vhost enqueue/dequeue issue Ouyang Changchun
2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
2015-05-31  5:03     ` Xie, Huawei
2015-05-31 13:20       ` Ouyang, Changchun
2015-05-31  8:40     ` Xie, Huawei
2015-05-31 12:59       ` Ouyang, Changchun
2015-05-31 13:22         ` Ouyang, Changchun
2015-05-31 13:33       ` Ouyang, Changchun
2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 2/5] lib_vhost: Refine code style Ouyang Changchun
2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 3/5] lib_vhost: Extract function Ouyang Changchun
2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 4/5] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
2015-05-28 15:16   ` [dpdk-dev] [PATCH v2 5/5] lib_vhost: Add support copying scattered mbuf to vring Ouyang Changchun
2015-05-31  9:10     ` Xie, Huawei
2015-05-31 13:07       ` Ouyang, Changchun
2015-06-01  8:25   ` [dpdk-dev] [PATCH v3 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
2015-06-02  7:51       ` Xie, Huawei
2015-06-02  8:10         ` Ouyang, Changchun
2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 2/4] lib_vhost: Refine code style Ouyang Changchun
2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 3/4] lib_vhost: Extract function Ouyang Changchun
2015-06-01  8:25     ` [dpdk-dev] [PATCH v3 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
2015-06-02  8:51     ` [dpdk-dev] [PATCH v4 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 2/4] lib_vhost: Refine code style Ouyang Changchun
2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 3/4] lib_vhost: Extract function Ouyang Changchun
2015-06-02  8:51       ` [dpdk-dev] [PATCH v4 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
2015-06-03  6:02       ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 2/4] lib_vhost: Refine code style Ouyang Changchun
2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 3/4] lib_vhost: Extract function Ouyang Changchun
2015-06-03  6:02         ` [dpdk-dev] [PATCH v5 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
2015-06-03  7:50         ` [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue Xu, Qian Q
2015-06-08  3:18         ` [dpdk-dev] [PATCH v6 " Ouyang Changchun
2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 2/4] lib_vhost: Refine code style Ouyang Changchun
2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 3/4] lib_vhost: Extract function Ouyang Changchun
2015-06-08  3:18           ` [dpdk-dev] [PATCH v6 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
2015-06-09  1:03           ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Ouyang Changchun
2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 1/4] lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors Ouyang Changchun
2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 2/4] lib_vhost: Refine code style Ouyang Changchun
2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 3/4] lib_vhost: Extract function Ouyang Changchun
2015-06-09  1:03             ` [dpdk-dev] [PATCH v7 4/4] lib_vhost: Remove unnecessary vring descriptor length updating Ouyang Changchun
2015-06-10  1:40             ` [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue Xie, Huawei
2015-06-10  6:49             ` Xie, Huawei
2015-06-17 14:57               ` Thomas Monjalon
2015-06-15  9:42             ` Thomas Monjalon
2015-06-16  1:01               ` Ouyang, Changchun

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).