DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors
@ 2018-07-06  7:04 Maxime Coquelin
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 1/5] vhost: use shadow used ring in dequeue path Maxime Coquelin
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: Maxime Coquelin @ 2018-07-06  7:04 UTC (permalink / raw)
  To: tiwei.bie, zhihong.wang, dev; +Cc: Maxime Coquelin

This series is again preliminray work to ease packed ring
layout integration.

Main changes are using vector buffres also in the dequeue
path, and perform IOVA to HVA translation at vectors fill
time.

I still have to run more benchmarks, but PVP benchmarks does
not show performance changes.

Good thing is that it saves ~140 further lines.

Changes since v3:
=================
- Fix dequeue_zero_copy last_used_idx update (Tiwei)
- Remove "vhost: make gpa to hpa failure an error" patch (Tiwei)

Changes since v2:
=================
 - check vec_id doesn't overflow (Tiwei)
 - Fix perm parameters passed to fill_vec_buf (Tiwei)
 - Remove extra space in variable assignation (Tiwei)


Maxime Coquelin (5):
  vhost: use shadow used ring in dequeue path
  vhost: use buffer vectors in dequeue path
  vhost: improve prefetching in dequeue path
  vhost: prefetch first descriptor in dequeue path
  vhost: improve prefetching in enqueue path

 lib/librte_vhost/vhost.h      |   1 +
 lib/librte_vhost/virtio_net.c | 517 ++++++++++++++++--------------------------
 2 files changed, 193 insertions(+), 325 deletions(-)

-- 
2.14.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v4 1/5] vhost: use shadow used ring in dequeue path
  2018-07-06  7:04 [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Maxime Coquelin
@ 2018-07-06  7:04 ` Maxime Coquelin
  2018-07-06  7:59   ` Maxime Coquelin
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 2/5] vhost: use buffer vectors " Maxime Coquelin
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 10+ messages in thread
From: Maxime Coquelin @ 2018-07-06  7:04 UTC (permalink / raw)
  To: tiwei.bie, zhihong.wang, dev; +Cc: Maxime Coquelin

Relax used ring contention by reusing the shadow used
ring feature used by enqueue path.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/virtio_net.c | 50 +++++++++----------------------------------
 1 file changed, 10 insertions(+), 40 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 98ad8e936..741267345 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1019,35 +1019,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return error;
 }
 
-static __rte_always_inline void
-update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		 uint32_t used_idx, uint32_t desc_idx)
-{
-	vq->used->ring[used_idx].id  = desc_idx;
-	vq->used->ring[used_idx].len = 0;
-	vhost_log_cache_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
-}
-
-static __rte_always_inline void
-update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		uint32_t count)
-{
-	if (unlikely(count == 0))
-		return;
-
-	rte_smp_wmb();
-	rte_smp_rmb();
-
-	vhost_log_cache_sync(dev, vq);
-
-	vq->used->idx += count;
-	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
-			sizeof(vq->used->idx));
-	vhost_vring_call(dev, vq);
-}
-
 static __rte_always_inline struct zcopy_mbuf *
 get_zmbuf(struct vhost_virtqueue *vq)
 {
@@ -1115,7 +1086,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	struct rte_mbuf *rarp_mbuf = NULL;
 	struct vhost_virtqueue *vq;
 	uint32_t desc_indexes[MAX_PKT_BURST];
-	uint32_t used_idx;
 	uint32_t i = 0;
 	uint16_t free_entries;
 	uint16_t avail_idx;
@@ -1146,6 +1116,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		goto out_access_unlock;
 
 	vq->batch_copy_nb_elems = 0;
+	vq->shadow_used_idx = 0;
 
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 		vhost_user_iotlb_rd_lock(vq);
@@ -1163,9 +1134,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			next = TAILQ_NEXT(zmbuf, next);
 
 			if (mbuf_is_consumed(zmbuf->mbuf)) {
-				used_idx = vq->last_used_idx++ & (vq->size - 1);
-				update_used_ring(dev, vq, used_idx,
-						 zmbuf->desc_idx);
+				update_shadow_used_ring(vq, zmbuf->desc_idx, 0);
 				nr_updated += 1;
 
 				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
@@ -1176,7 +1145,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			}
 		}
 
-		update_used_idx(dev, vq, nr_updated);
+		flush_shadow_used_ring(dev, vq);
+		vhost_vring_call(dev, vq);
+		vq->shadow_used_idx = 0;
 	}
 
 	/*
@@ -1217,9 +1188,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	/* Prefetch available and used ring */
 	avail_idx = vq->last_avail_idx & (vq->size - 1);
-	used_idx  = vq->last_used_idx  & (vq->size - 1);
 	rte_prefetch0(&vq->avail->ring[avail_idx]);
-	rte_prefetch0(&vq->used->ring[used_idx]);
 
 	count = RTE_MIN(count, MAX_PKT_BURST);
 	count = RTE_MIN(count, free_entries);
@@ -1229,11 +1198,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	/* Retrieve all of the head indexes first to avoid caching issues. */
 	for (i = 0; i < count; i++) {
 		avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
-		used_idx  = (vq->last_used_idx  + i) & (vq->size - 1);
 		desc_indexes[i] = vq->avail->ring[avail_idx];
 
 		if (likely(dev->dequeue_zero_copy == 0))
-			update_used_ring(dev, vq, used_idx, desc_indexes[i]);
+			update_shadow_used_ring(vq, desc_indexes[i], 0);
 	}
 
 	/* Prefetch descriptor index. */
@@ -1326,8 +1294,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	if (likely(dev->dequeue_zero_copy == 0)) {
 		do_data_copy_dequeue(vq);
-		vq->last_used_idx += i;
-		update_used_idx(dev, vq, i);
+		if (unlikely(i < count))
+			vq->shadow_used_idx = i;
+		flush_shadow_used_ring(dev, vq);
+		vhost_vring_call(dev, vq);
 	}
 
 out:
-- 
2.14.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v4 2/5] vhost: use buffer vectors in dequeue path
  2018-07-06  7:04 [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Maxime Coquelin
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 1/5] vhost: use shadow used ring in dequeue path Maxime Coquelin
@ 2018-07-06  7:04 ` Maxime Coquelin
       [not found]   ` <2DBBFF226F7CF64BAFCA79B681719D953A4EB9E3@SHSMSX101.ccr.corp.intel.com>
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 3/5] vhost: improve prefetching " Maxime Coquelin
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 10+ messages in thread
From: Maxime Coquelin @ 2018-07-06  7:04 UTC (permalink / raw)
  To: tiwei.bie, zhihong.wang, dev; +Cc: Maxime Coquelin

To ease packed ring layout integration, this patch makes
the dequeue path to re-use buffer vectors implemented for
enqueue path.

Doing this, copy_desc_to_mbuf() is now ring layout type
agnostic.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.h      |   1 +
 lib/librte_vhost/virtio_net.c | 451 ++++++++++++++++--------------------------
 2 files changed, 167 insertions(+), 285 deletions(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 3437b996b..79e3117d2 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -43,6 +43,7 @@
  * from vring to do scatter RX.
  */
 struct buf_vector {
+	uint64_t buf_iova;
 	uint64_t buf_addr;
 	uint32_t buf_len;
 	uint32_t desc_idx;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 741267345..6339296c7 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -225,12 +225,12 @@ static __rte_always_inline int
 fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			 uint32_t avail_idx, uint32_t *vec_idx,
 			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-			 uint16_t *desc_chain_len)
+			 uint16_t *desc_chain_len, uint8_t perm)
 {
 	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
 	uint32_t vec_id = *vec_idx;
 	uint32_t len    = 0;
-	uint64_t dlen;
+	uint64_t dlen, desc_avail, desc_iova;
 	struct vring_desc *descs = vq->desc;
 	struct vring_desc *idesc = NULL;
 
@@ -261,16 +261,43 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	}
 
 	while (1) {
-		if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) {
+		if (unlikely(idx >= vq->size)) {
 			free_ind_table(idesc);
 			return -1;
 		}
 
+
 		len += descs[idx].len;
-		buf_vec[vec_id].buf_addr = descs[idx].addr;
-		buf_vec[vec_id].buf_len  = descs[idx].len;
-		buf_vec[vec_id].desc_idx = idx;
-		vec_id++;
+		desc_avail = descs[idx].len;
+		desc_iova = descs[idx].addr;
+
+		while (desc_avail) {
+			uint64_t desc_addr;
+			uint64_t desc_chunck_len = desc_avail;
+
+			if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
+				free_ind_table(idesc);
+				return -1;
+			}
+
+			desc_addr = vhost_iova_to_vva(dev, vq,
+					desc_iova,
+					&desc_chunck_len,
+					perm);
+			if (unlikely(!desc_addr)) {
+				free_ind_table(idesc);
+				return -1;
+			}
+
+			buf_vec[vec_id].buf_iova = desc_iova;
+			buf_vec[vec_id].buf_addr = desc_addr;
+			buf_vec[vec_id].buf_len  = desc_chunck_len;
+			buf_vec[vec_id].desc_idx = idx;
+
+			desc_avail -= desc_chunck_len;
+			desc_iova += desc_chunck_len;
+			vec_id++;
+		}
 
 		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
 			break;
@@ -293,7 +320,8 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 static inline int
 reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint32_t size, struct buf_vector *buf_vec,
-				uint16_t *num_buffers, uint16_t avail_head)
+				uint16_t *num_buffers, uint16_t avail_head,
+				uint16_t *nr_vec)
 {
 	uint16_t cur_idx;
 	uint32_t vec_idx = 0;
@@ -315,7 +343,8 @@ reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			return -1;
 
 		if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
-						&head_idx, &len) < 0))
+						&head_idx, &len,
+						VHOST_ACCESS_RW) < 0))
 			return -1;
 		len = RTE_MIN(len, size);
 		update_shadow_used_ring(vq, head_idx, len);
@@ -334,21 +363,22 @@ reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			return -1;
 	}
 
+	*nr_vec = vec_idx;
+
 	return 0;
 }
 
 static __rte_always_inline int
 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			    struct rte_mbuf *m, struct buf_vector *buf_vec,
-			    uint16_t num_buffers)
+			    uint16_t nr_vec, uint16_t num_buffers)
 {
 	uint32_t vec_idx = 0;
-	uint64_t desc_addr, desc_gaddr;
 	uint32_t mbuf_offset, mbuf_avail;
-	uint32_t desc_offset, desc_avail;
+	uint32_t buf_offset, buf_avail;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t cpy_len;
-	uint64_t desc_chunck_len;
-	uint64_t hdr_addr, hdr_phys_addr;
+	uint64_t hdr_addr;
 	struct rte_mbuf *hdr_mbuf;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
 	struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
@@ -359,82 +389,57 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		goto out;
 	}
 
-	desc_chunck_len = buf_vec[vec_idx].buf_len;
-	desc_gaddr = buf_vec[vec_idx].buf_addr;
-	desc_addr = vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RW);
-	if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
+
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
 	hdr_mbuf = m;
-	hdr_addr = desc_addr;
-	if (unlikely(desc_chunck_len < dev->vhost_hlen))
+	hdr_addr = buf_addr;
+	if (unlikely(buf_len < dev->vhost_hlen))
 		hdr = &tmp_hdr;
 	else
 		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
-	hdr_phys_addr = desc_gaddr;
 	rte_prefetch0((void *)(uintptr_t)hdr_addr);
 
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
 		dev->vid, num_buffers);
 
-	desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
-	if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
-		desc_chunck_len = desc_avail;
-		desc_gaddr += dev->vhost_hlen;
-		desc_addr = vhost_iova_to_vva(dev, vq,
-				desc_gaddr,
-				&desc_chunck_len,
-				VHOST_ACCESS_RW);
-		if (unlikely(!desc_addr)) {
-			error = -1;
-			goto out;
-		}
-
-		desc_offset = 0;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail = buf_len - buf_offset;
 	} else {
-		desc_offset = dev->vhost_hlen;
-		desc_chunck_len -= dev->vhost_hlen;
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_len - dev->vhost_hlen;
 	}
 
-
 	mbuf_avail  = rte_pktmbuf_data_len(m);
 	mbuf_offset = 0;
 	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current desc buf, get the next one */
-		if (desc_avail == 0) {
+		/* done with current buf, get the next one */
+		if (buf_avail == 0) {
 			vec_idx++;
-			desc_chunck_len = buf_vec[vec_idx].buf_len;
-			desc_gaddr = buf_vec[vec_idx].buf_addr;
-			desc_addr =
-				vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
+			if (unlikely(vec_idx >= nr_vec)) {
 				error = -1;
 				goto out;
 			}
 
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
+
 			/* Prefetch buffer address. */
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
-			desc_offset = 0;
-			desc_avail  = buf_vec[vec_idx].buf_len;
-		} else if (unlikely(desc_chunck_len == 0)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += desc_offset;
-			desc_addr = vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len, VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-			desc_offset = 0;
+			rte_prefetch0((void *)(uintptr_t)buf_addr);
+			buf_offset = 0;
+			buf_avail  = buf_len;
 		}
 
 		/* done with current mbuf, get the next one */
@@ -455,18 +460,12 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint64_t len;
 				uint64_t remain = dev->vhost_hlen;
 				uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
-				uint64_t guest_addr = hdr_phys_addr;
+				uint64_t iova = buf_vec[0].buf_iova;
+				uint16_t hdr_vec_idx = 0;
 
 				while (remain) {
 					len = remain;
-					dst = vhost_iova_to_vva(dev, vq,
-							guest_addr, &len,
-							VHOST_ACCESS_RW);
-					if (unlikely(!dst || !len)) {
-						error = -1;
-						goto out;
-					}
-
+					dst = buf_vec[hdr_vec_idx].buf_addr;
 					rte_memcpy((void *)(uintptr_t)dst,
 							(void *)(uintptr_t)src,
 							len);
@@ -474,50 +473,50 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 					PRINT_PACKET(dev, (uintptr_t)dst,
 							(uint32_t)len, 0);
 					vhost_log_cache_write(dev, vq,
-							guest_addr, len);
+							iova, len);
 
 					remain -= len;
-					guest_addr += len;
+					iova += len;
 					src += len;
+					hdr_vec_idx++;
 				}
 			} else {
 				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
 						dev->vhost_hlen, 0);
-				vhost_log_cache_write(dev, vq, hdr_phys_addr,
+				vhost_log_cache_write(dev, vq,
+						buf_vec[0].buf_iova,
 						dev->vhost_hlen);
 			}
 
 			hdr_addr = 0;
 		}
 
-		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
+		cpy_len = RTE_MIN(buf_len, mbuf_avail);
 
 		if (likely(cpy_len > MAX_BATCH_LEN ||
 					vq->batch_copy_nb_elems >= vq->size)) {
-			rte_memcpy((void *)((uintptr_t)(desc_addr +
-							desc_offset)),
+			rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
 				cpy_len);
-			vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
+			vhost_log_cache_write(dev, vq, buf_iova + buf_offset,
 					cpy_len);
-			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+			PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
 				cpy_len, 0);
 		} else {
 			batch_copy[vq->batch_copy_nb_elems].dst =
-				(void *)((uintptr_t)(desc_addr + desc_offset));
+				(void *)((uintptr_t)(buf_addr + buf_offset));
 			batch_copy[vq->batch_copy_nb_elems].src =
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
 			batch_copy[vq->batch_copy_nb_elems].log_addr =
-				desc_gaddr + desc_offset;
+				buf_iova + buf_offset;
 			batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
 			vq->batch_copy_nb_elems++;
 		}
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
-		desc_chunck_len -= cpy_len;
+		buf_avail  -= cpy_len;
+		buf_offset += cpy_len;
 	}
 
 out:
@@ -568,10 +567,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	avail_head = *((volatile uint16_t *)&vq->avail->idx);
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+		uint16_t nr_vec = 0;
 
 		if (unlikely(reserve_avail_buf(dev, vq,
 						pkt_len, buf_vec, &num_buffers,
-						avail_head) < 0)) {
+						avail_head, &nr_vec) < 0)) {
 			VHOST_LOG_DEBUG(VHOST_DATA,
 				"(%d) failed to get enough desc from vring\n",
 				dev->vid);
@@ -584,7 +584,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			vq->last_avail_idx + num_buffers);
 
 		if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
-						buf_vec, num_buffers) < 0) {
+						buf_vec, nr_vec,
+						num_buffers) < 0) {
 			vq->shadow_used_idx -= num_buffers;
 			break;
 		}
@@ -750,49 +751,40 @@ put_zmbuf(struct zcopy_mbuf *zmbuf)
 
 static __rte_always_inline int
 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct vring_desc *descs, uint16_t max_desc,
-		  struct rte_mbuf *m, uint16_t desc_idx,
-		  struct rte_mempool *mbuf_pool)
+		  struct buf_vector *buf_vec, uint16_t nr_vec,
+		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool)
 {
-	struct vring_desc *desc;
-	uint64_t desc_addr, desc_gaddr;
-	uint32_t desc_avail, desc_offset;
+	uint32_t buf_avail, buf_offset;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t mbuf_avail, mbuf_offset;
 	uint32_t cpy_len;
-	uint64_t desc_chunck_len;
 	struct rte_mbuf *cur = m, *prev = m;
 	struct virtio_net_hdr tmp_hdr;
 	struct virtio_net_hdr *hdr = NULL;
 	/* A counter to avoid desc dead loop chain */
-	uint32_t nr_desc = 1;
+	uint16_t vec_idx = 0;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
 	int error = 0;
 
-	desc = &descs[desc_idx];
-	if (unlikely((desc->len < dev->vhost_hlen)) ||
-			(desc->flags & VRING_DESC_F_INDIRECT)) {
-		error = -1;
-		goto out;
-	}
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
 
-	desc_chunck_len = desc->len;
-	desc_gaddr = desc->addr;
-	desc_addr = vhost_iova_to_vva(dev,
-					vq, desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RO);
-	if (unlikely(!desc_addr)) {
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
+	if (likely(nr_vec > 1))
+		rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
 	if (virtio_net_with_host_offload(dev)) {
-		if (unlikely(desc_chunck_len < sizeof(struct virtio_net_hdr))) {
-			uint64_t len = desc_chunck_len;
+		if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
+			uint64_t len;
 			uint64_t remain = sizeof(struct virtio_net_hdr);
-			uint64_t src = desc_addr;
+			uint64_t src;
 			uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
-			uint64_t guest_addr = desc_gaddr;
+			uint16_t hdr_vec_idx = 0;
 
 			/*
 			 * No luck, the virtio-net header doesn't fit
@@ -800,25 +792,18 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			 */
 			while (remain) {
 				len = remain;
-				src = vhost_iova_to_vva(dev, vq,
-						guest_addr, &len,
-						VHOST_ACCESS_RO);
-				if (unlikely(!src || !len)) {
-					error = -1;
-					goto out;
-				}
-
+				src = buf_vec[hdr_vec_idx].buf_addr;
 				rte_memcpy((void *)(uintptr_t)dst,
 						   (void *)(uintptr_t)src, len);
 
-				guest_addr += len;
 				remain -= len;
 				dst += len;
+				hdr_vec_idx++;
 			}
 
 			hdr = &tmp_hdr;
 		} else {
-			hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
 			rte_prefetch0(hdr);
 		}
 	}
@@ -828,61 +813,40 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	 * for Tx: the first for storing the header, and others
 	 * for storing the data.
 	 */
-	if (likely((desc->len == dev->vhost_hlen) &&
-		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
-		desc = &descs[desc->next];
-		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-			error = -1;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail  = buf_len - buf_offset;
+	} else if (buf_len == dev->vhost_hlen) {
+		if (unlikely(++vec_idx >= nr_vec))
 			goto out;
-		}
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
 
-		desc_chunck_len = desc->len;
-		desc_gaddr = desc->addr;
-		desc_addr = vhost_iova_to_vva(dev,
-							vq, desc_gaddr,
-							&desc_chunck_len,
-							VHOST_ACCESS_RO);
-		if (unlikely(!desc_addr)) {
-			error = -1;
-			goto out;
-		}
-
-		desc_offset = 0;
-		desc_avail  = desc->len;
-		nr_desc    += 1;
+		buf_offset = 0;
+		buf_avail = buf_len;
 	} else {
-		desc_avail  = desc->len - dev->vhost_hlen;
-
-		if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += dev->vhost_hlen;
-			desc_addr = vhost_iova_to_vva(dev,
-					vq, desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-
-			desc_offset = 0;
-		} else {
-			desc_offset = dev->vhost_hlen;
-			desc_chunck_len -= dev->vhost_hlen;
-		}
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
 	}
 
-	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+	rte_prefetch0((void *)(uintptr_t)
+			(buf_addr + buf_offset));
 
-	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-			(uint32_t)desc_chunck_len, 0);
+	PRINT_PACKET(dev,
+			(uintptr_t)(buf_addr + buf_offset),
+			(uint32_t)buf_avail, 0);
 
 	mbuf_offset = 0;
 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
 	while (1) {
 		uint64_t hpa;
 
-		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
 		/*
 		 * A desc buf might across two host physical pages that are
@@ -890,11 +854,11 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		 * will be copied even though zero copy is enabled.
 		 */
 		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
-					desc_gaddr + desc_offset, cpy_len)))) {
+					buf_iova + buf_offset, cpy_len)))) {
 			cur->data_len = cpy_len;
 			cur->data_off = 0;
-			cur->buf_addr = (void *)(uintptr_t)(desc_addr
-				+ desc_offset);
+			cur->buf_addr =
+				(void *)(uintptr_t)(buf_addr + buf_offset);
 			cur->buf_iova = hpa;
 
 			/*
@@ -905,20 +869,19 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		} else {
 			if (likely(cpy_len > MAX_BATCH_LEN ||
 				   vq->batch_copy_nb_elems >= vq->size ||
-				   (hdr && cur == m) ||
-				   desc->len != desc_chunck_len)) {
+				   (hdr && cur == m))) {
 				rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
 								   mbuf_offset),
-					   (void *)((uintptr_t)(desc_addr +
-								desc_offset)),
+					   (void *)((uintptr_t)(buf_addr +
+							   buf_offset)),
 					   cpy_len);
 			} else {
 				batch_copy[vq->batch_copy_nb_elems].dst =
 					rte_pktmbuf_mtod_offset(cur, void *,
 								mbuf_offset);
 				batch_copy[vq->batch_copy_nb_elems].src =
-					(void *)((uintptr_t)(desc_addr +
-							     desc_offset));
+					(void *)((uintptr_t)(buf_addr +
+								buf_offset));
 				batch_copy[vq->batch_copy_nb_elems].len =
 					cpy_len;
 				vq->batch_copy_nb_elems++;
@@ -927,59 +890,25 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_chunck_len -= cpy_len;
-		desc_offset += cpy_len;
+		buf_avail -= cpy_len;
+		buf_offset += cpy_len;
 
-		/* This desc reaches to its end, get the next one */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+		/* This buf reaches to its end, get the next one */
+		if (buf_avail == 0) {
+			if (++vec_idx >= nr_vec)
 				break;
 
-			if (unlikely(desc->next >= max_desc ||
-				     ++nr_desc > max_desc)) {
-				error = -1;
-				goto out;
-			}
-			desc = &descs[desc->next];
-			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-				error = -1;
-				goto out;
-			}
-
-			desc_chunck_len = desc->len;
-			desc_gaddr = desc->addr;
-			desc_addr = vhost_iova_to_vva(dev,
-							vq, desc_gaddr,
-							&desc_chunck_len,
-							VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
 
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
+			rte_prefetch0((void *)(uintptr_t)buf_addr);
 
-			desc_offset = 0;
-			desc_avail  = desc->len;
-
-			PRINT_PACKET(dev, (uintptr_t)desc_addr,
-					(uint32_t)desc_chunck_len, 0);
-		} else if (unlikely(desc_chunck_len == 0)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += desc_offset;
-			desc_addr = vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-			desc_offset = 0;
+			buf_offset = 0;
+			buf_avail  = buf_len;
 
-			PRINT_PACKET(dev, (uintptr_t)desc_addr,
-					(uint32_t)desc_chunck_len, 0);
+			PRINT_PACKET(dev, (uintptr_t)buf_addr,
+					(uint32_t)buf_avail, 0);
 		}
 
 		/*
@@ -1085,10 +1014,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	struct virtio_net *dev;
 	struct rte_mbuf *rarp_mbuf = NULL;
 	struct vhost_virtqueue *vq;
-	uint32_t desc_indexes[MAX_PKT_BURST];
 	uint32_t i = 0;
 	uint16_t free_entries;
-	uint16_t avail_idx;
 
 	dev = get_device(vid);
 	if (!dev)
@@ -1186,80 +1113,38 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
-	/* Prefetch available and used ring */
-	avail_idx = vq->last_avail_idx & (vq->size - 1);
-	rte_prefetch0(&vq->avail->ring[avail_idx]);
-
 	count = RTE_MIN(count, MAX_PKT_BURST);
 	count = RTE_MIN(count, free_entries);
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
 			dev->vid, count);
 
-	/* Retrieve all of the head indexes first to avoid caching issues. */
-	for (i = 0; i < count; i++) {
-		avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[avail_idx];
-
-		if (likely(dev->dequeue_zero_copy == 0))
-			update_shadow_used_ring(vq, desc_indexes[i], 0);
-	}
-
-	/* Prefetch descriptor index. */
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
 	for (i = 0; i < count; i++) {
-		struct vring_desc *desc, *idesc = NULL;
-		uint16_t sz, idx;
-		uint64_t dlen;
+		struct buf_vector buf_vec[BUF_VECTOR_MAX];
+		uint16_t head_idx, dummy_len;
+		uint32_t nr_vec = 0;
 		int err;
 
-		if (likely(i + 1 < count))
-			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
-
-		if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
-			dlen = vq->desc[desc_indexes[i]].len;
-			desc = (struct vring_desc *)(uintptr_t)
-				vhost_iova_to_vva(dev, vq,
-						vq->desc[desc_indexes[i]].addr,
-						&dlen,
-						VHOST_ACCESS_RO);
-			if (unlikely(!desc))
-				break;
-
-			if (unlikely(dlen < vq->desc[desc_indexes[i]].len)) {
-				/*
-				 * The indirect desc table is not contiguous
-				 * in process VA space, we have to copy it.
-				 */
-				idesc = alloc_copy_ind_table(dev, vq,
-						&vq->desc[desc_indexes[i]]);
-				if (unlikely(!idesc))
-					break;
-
-				desc = idesc;
-			}
+		if (unlikely(fill_vec_buf(dev, vq,
+						vq->last_avail_idx + i,
+						&nr_vec, buf_vec,
+						&head_idx, &dummy_len,
+						VHOST_ACCESS_RO) < 0))
+			break;
 
-			rte_prefetch0(desc);
-			sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
-			idx = 0;
-		} else {
-			desc = vq->desc;
-			sz = vq->size;
-			idx = desc_indexes[i];
-		}
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_shadow_used_ring(vq, head_idx, 0);
 
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
 			RTE_LOG(ERR, VHOST_DATA,
 				"Failed to allocate memory for mbuf.\n");
-			free_ind_table(idesc);
 			break;
 		}
 
-		err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx,
-					mbuf_pool);
+		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+				mbuf_pool);
 		if (unlikely(err)) {
 			rte_pktmbuf_free(pkts[i]);
-			free_ind_table(idesc);
 			break;
 		}
 
@@ -1269,11 +1154,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			zmbuf = get_zmbuf(vq);
 			if (!zmbuf) {
 				rte_pktmbuf_free(pkts[i]);
-				free_ind_table(idesc);
 				break;
 			}
 			zmbuf->mbuf = pkts[i];
-			zmbuf->desc_idx = desc_indexes[i];
+			zmbuf->desc_idx = head_idx;
 
 			/*
 			 * Pin lock the mbuf; we will check later to see
@@ -1286,9 +1170,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			vq->nr_zmbuf += 1;
 			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
 		}
-
-		if (unlikely(!!idesc))
-			free_ind_table(idesc);
 	}
 	vq->last_avail_idx += i;
 
-- 
2.14.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v4 3/5] vhost: improve prefetching in dequeue path
  2018-07-06  7:04 [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Maxime Coquelin
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 1/5] vhost: use shadow used ring in dequeue path Maxime Coquelin
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 2/5] vhost: use buffer vectors " Maxime Coquelin
@ 2018-07-06  7:04 ` Maxime Coquelin
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 4/5] vhost: prefetch first descriptor " Maxime Coquelin
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2018-07-06  7:04 UTC (permalink / raw)
  To: tiwei.bie, zhihong.wang, dev; +Cc: Maxime Coquelin

This is an optimization to prefetch next buffer while the
current one is being processed.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/virtio_net.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 6339296c7..2cfd8585c 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -902,7 +902,13 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			buf_iova = buf_vec[vec_idx].buf_iova;
 			buf_len = buf_vec[vec_idx].buf_len;
 
-			rte_prefetch0((void *)(uintptr_t)buf_addr);
+			/*
+			 * Prefecth desc n + 1 buffer while
+			 * desc n buffer is processed.
+			 */
+			if (vec_idx + 1 < nr_vec)
+				rte_prefetch0((void *)(uintptr_t)
+						buf_vec[vec_idx + 1].buf_addr);
 
 			buf_offset = 0;
 			buf_avail  = buf_len;
@@ -1134,6 +1140,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		if (likely(dev->dequeue_zero_copy == 0))
 			update_shadow_used_ring(vq, head_idx, 0);
 
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
 			RTE_LOG(ERR, VHOST_DATA,
-- 
2.14.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v4 4/5] vhost: prefetch first descriptor in dequeue path
  2018-07-06  7:04 [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Maxime Coquelin
                   ` (2 preceding siblings ...)
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 3/5] vhost: improve prefetching " Maxime Coquelin
@ 2018-07-06  7:04 ` Maxime Coquelin
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 5/5] vhost: improve prefetching in enqueue path Maxime Coquelin
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2018-07-06  7:04 UTC (permalink / raw)
  To: tiwei.bie, zhihong.wang, dev; +Cc: Maxime Coquelin

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/virtio_net.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2cfd8585c..2662a1d32 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1083,6 +1083,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		vq->shadow_used_idx = 0;
 	}
 
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+
 	/*
 	 * Construct a RARP broadcast packet, and inject it to the "pkts"
 	 * array, to looks like that guest actually send such packet.
-- 
2.14.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v4 5/5] vhost: improve prefetching in enqueue path
  2018-07-06  7:04 [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Maxime Coquelin
                   ` (3 preceding siblings ...)
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 4/5] vhost: prefetch first descriptor " Maxime Coquelin
@ 2018-07-06  7:04 ` Maxime Coquelin
  2018-07-09  1:39 ` [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Tiwei Bie
  2018-07-09  7:01 ` Tiwei Bie
  6 siblings, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2018-07-06  7:04 UTC (permalink / raw)
  To: tiwei.bie, zhihong.wang, dev; +Cc: Maxime Coquelin

This is an optimization to prefetch next buffer while the
current one is being processed.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/virtio_net.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2662a1d32..82d5d9e17 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -393,6 +393,9 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	buf_iova = buf_vec[vec_idx].buf_iova;
 	buf_len = buf_vec[vec_idx].buf_len;
 
+	if (nr_vec > 1)
+		rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
 	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
@@ -404,7 +407,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		hdr = &tmp_hdr;
 	else
 		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
-	rte_prefetch0((void *)(uintptr_t)hdr_addr);
 
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
 		dev->vid, num_buffers);
@@ -436,8 +438,10 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			buf_iova = buf_vec[vec_idx].buf_iova;
 			buf_len = buf_vec[vec_idx].buf_len;
 
-			/* Prefetch buffer address. */
-			rte_prefetch0((void *)(uintptr_t)buf_addr);
+			/* Prefetch next buffer address. */
+			if (vec_idx + 1 < nr_vec)
+				rte_prefetch0((void *)(uintptr_t)
+						buf_vec[vec_idx + 1].buf_addr);
 			buf_offset = 0;
 			buf_avail  = buf_len;
 		}
@@ -579,6 +583,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			break;
 		}
 
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
 		VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
 			dev->vid, vq->last_avail_idx,
 			vq->last_avail_idx + num_buffers);
-- 
2.14.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] vhost: use shadow used ring in dequeue path
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 1/5] vhost: use shadow used ring in dequeue path Maxime Coquelin
@ 2018-07-06  7:59   ` Maxime Coquelin
  0 siblings, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2018-07-06  7:59 UTC (permalink / raw)
  To: tiwei.bie, zhihong.wang, dev

Hi Tiwei,

On 07/06/2018 09:04 AM, Maxime Coquelin wrote:
> Relax used ring contention by reusing the shadow used
> ring feature used by enqueue path.
> 
> Signed-off-by: Maxime Coquelin<maxime.coquelin@redhat.com>
Just noticed I forgot to apply your:
Reviewed-by: Tiwei Bie <tiwei.bie@intel.com>

Regards,
Maxime
> ---
>   lib/librte_vhost/virtio_net.c | 50 +++++++++----------------------------------
>   1 file changed, 10 insertions(+), 40 deletions(-)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors
  2018-07-06  7:04 [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Maxime Coquelin
                   ` (4 preceding siblings ...)
  2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 5/5] vhost: improve prefetching in enqueue path Maxime Coquelin
@ 2018-07-09  1:39 ` Tiwei Bie
  2018-07-09  7:01 ` Tiwei Bie
  6 siblings, 0 replies; 10+ messages in thread
From: Tiwei Bie @ 2018-07-09  1:39 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: zhihong.wang, dev

On Fri, Jul 06, 2018 at 09:04:44AM +0200, Maxime Coquelin wrote:
> This series is again preliminray work to ease packed ring
> layout integration.
> 
> Main changes are using vector buffres also in the dequeue
> path, and perform IOVA to HVA translation at vectors fill
> time.
> 
> I still have to run more benchmarks, but PVP benchmarks does
> not show performance changes.
> 
> Good thing is that it saves ~140 further lines.
> 
> Changes since v3:
> =================
> - Fix dequeue_zero_copy last_used_idx update (Tiwei)
> - Remove "vhost: make gpa to hpa failure an error" patch (Tiwei)
> 
> Changes since v2:
> =================
>  - check vec_id doesn't overflow (Tiwei)
>  - Fix perm parameters passed to fill_vec_buf (Tiwei)
>  - Remove extra space in variable assignation (Tiwei)
> 
> 
> Maxime Coquelin (5):
>   vhost: use shadow used ring in dequeue path
>   vhost: use buffer vectors in dequeue path
>   vhost: improve prefetching in dequeue path
>   vhost: prefetch first descriptor in dequeue path
>   vhost: improve prefetching in enqueue path
> 
>  lib/librte_vhost/vhost.h      |   1 +
>  lib/librte_vhost/virtio_net.c | 517 ++++++++++++++++--------------------------
>  2 files changed, 193 insertions(+), 325 deletions(-)
> 
> -- 
> 2.14.4
> 

For the series:
Acked-by: Tiwei Bie <tiwei.bie@intel.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors
  2018-07-06  7:04 [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Maxime Coquelin
                   ` (5 preceding siblings ...)
  2018-07-09  1:39 ` [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Tiwei Bie
@ 2018-07-09  7:01 ` Tiwei Bie
  6 siblings, 0 replies; 10+ messages in thread
From: Tiwei Bie @ 2018-07-09  7:01 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: zhihong.wang, dev

On Fri, Jul 06, 2018 at 09:04:44AM +0200, Maxime Coquelin wrote:
> This series is again preliminray work to ease packed ring
> layout integration.
> 
> Main changes are using vector buffres also in the dequeue
> path, and perform IOVA to HVA translation at vectors fill
> time.
> 
> I still have to run more benchmarks, but PVP benchmarks does
> not show performance changes.
> 
> Good thing is that it saves ~140 further lines.
> 
> Changes since v3:
> =================
> - Fix dequeue_zero_copy last_used_idx update (Tiwei)
> - Remove "vhost: make gpa to hpa failure an error" patch (Tiwei)
> 
> Changes since v2:
> =================
>  - check vec_id doesn't overflow (Tiwei)
>  - Fix perm parameters passed to fill_vec_buf (Tiwei)
>  - Remove extra space in variable assignation (Tiwei)
> 
> 
> Maxime Coquelin (5):
>   vhost: use shadow used ring in dequeue path
>   vhost: use buffer vectors in dequeue path
>   vhost: improve prefetching in dequeue path
>   vhost: prefetch first descriptor in dequeue path
>   vhost: improve prefetching in enqueue path
> 
>  lib/librte_vhost/vhost.h      |   1 +
>  lib/librte_vhost/virtio_net.c | 517 ++++++++++++++++--------------------------
>  2 files changed, 193 insertions(+), 325 deletions(-)
> 
> -- 
> 2.14.4
> 

Applied to dpdk-next-virtio/master, thanks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/5] vhost: use buffer vectors in dequeue path
       [not found]     ` <E0CBA5A1980F1F408E1F28F9991B5B1D50D03E40@SHSMSX104.ccr.corp.intel.com>
@ 2018-07-17  7:17       ` Wang, Yinan
  0 siblings, 0 replies; 10+ messages in thread
From: Wang, Yinan @ 2018-07-17  7:17 UTC (permalink / raw)
  To: 'maxime.coquelin@redhat.com', jfreimann, dev
  Cc: Yao, Lei A, Bie, Tiwei, Wang, Zhihong


Hi Maxime,

vhost user + virtio-net VM2VM TSO performance test can work well on dpdk v18.05. 
But during our performance test with v18.08-rc1, we found a regression in the VM2VM test case. When using iperf or netperf, the server VM will hang/crash. After the bisection, I found it's caused by your below patch.
Could you help to take a look? 

Below is the steps to reproduce:

1.Bind 82599 NIC port to igb_uio
2.Launch vhost-switch
./examples/vhost/build/vhost-switch -c 0x70000000 -n 4 --socket-mem 2048,2048 --legacy-mem -- -p 0x1 --mergeable 1 --vm2vm 1  --tso 1 --tx-csum 1  --socket-file ./vhost-net --socket-file ./vhost-net1
3.Launch VM1 and VM2.
      taskset -c 31 \
      qemu-system-x86_64  -name vm0 -enable-kvm \
      -chardev socket,path=/tmp/vm0_qga0.sock,server,nowait,id=vm0_qga0 \
      -device virtio-serial -device virtserialport,chardev=vm0_qga0,name=org.qemu.guest_agent.0 -daemonize \
      -monitor unix:/tmp/vm0_monitor.sock,server,nowait -net nic,vlan=0,macaddr=00:00:00:50:fb:f3,addr=1f -net user,vlan=0,hostfwd=tcp:127.0.0.1:6145-:22 \
      -chardev socket,id=char0,path=./vhost-net \
      -netdev type=vhost-user,id=netdev0,chardev=char0,vhostforce \
      -device virtio-net-pci,netdev=netdev0,mac=52:54:00:00:00:01 \
      -cpu host -smp 1 -m 4096 -object memory-backend-file,id=mem,size=4096M,mem-path=/mnt/huge,share=on -numa node,memdev=mem -mem-prealloc \
      -drive file=/home/osimg/ubuntu16.img -vnc :4

     taskset -c 32 \
     qemu-system-x86_64  -name vm1 -enable-kvm \
     -chardev socket,path=/tmp/vm1_qga0.sock,server,nowait,id=vm1_qga0 \
     -device virtio-serial -device virtserialport,chardev=vm1_qga0,name=org.qemu.guest_agent.0 -daemonize \
     -monitor unix:/tmp/vm1_monitor.sock,server,nowait -net nic,vlan=0,macaddr=00:00:00:40:75:e7,addr=1f -net user,vlan=0,hostfwd=tcp:127.0.0.1:6134-:22 \
     -chardev socket,id=char0,path=./vhost-net1 \
     -netdev type=vhost-user,id=netdev0,chardev=char0,vhostforce \
     -device virtio-net-pci,netdev=netdev0,mac=52:54:00:00:00:02 -cpu host -smp 1 -m 4096 \
     -object memory-backend-file,id=mem,size=4096M,mem-path=/mnt/huge,share=on -numa node,memdev=mem -mem-prealloc -drive file=/home/osimg/ubuntu16-2.img -vnc :5

4. On VM1, set the virtio IP and run iperf
    ifconfig ens4 1.1.1.2
    arp -s 1.1.1.8 52:54:00:00:00:02
    arp # to check the arp table is complete and correct. 

5. On VM2, set the virtio IP and run iperf
    ifconfig ens4 1.1.1.8
    arp -s 1.1.1.2 52:54:00:00:00:01
    arp # to check the arp table is complete and correct. 
 
6. Ensure virtio1 can ping virtio2,then in VM1, run : `iperf -s -i 1` ; In VM2, run `iperf -c 1.1.1.2 -i 1 -t 60`.  

7. Check the iperf performance for VM2VM case.

Best Wishes,
Yinan

-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Maxime Coquelin
Sent: Friday, July 6, 2018 8:05 AM
To: Bie, Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>; dev@dpdk.org
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [dpdk-dev] [PATCH v4 2/5] vhost: use buffer vectors in dequeue path

To ease packed ring layout integration, this patch makes the dequeue path to re-use buffer vectors implemented for enqueue path.

Doing this, copy_desc_to_mbuf() is now ring layout type agnostic.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.h      |   1 +
 lib/librte_vhost/virtio_net.c | 451 ++++++++++++++++--------------------------
 2 files changed, 167 insertions(+), 285 deletions(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 3437b996b..79e3117d2 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -43,6 +43,7 @@
  * from vring to do scatter RX.
  */
 struct buf_vector {
+	uint64_t buf_iova;
 	uint64_t buf_addr;
 	uint32_t buf_len;
 	uint32_t desc_idx;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 741267345..6339296c7 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -225,12 +225,12 @@ static __rte_always_inline int  fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			 uint32_t avail_idx, uint32_t *vec_idx,
 			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-			 uint16_t *desc_chain_len)
+			 uint16_t *desc_chain_len, uint8_t perm)
 {
 	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
 	uint32_t vec_id = *vec_idx;
 	uint32_t len    = 0;
-	uint64_t dlen;
+	uint64_t dlen, desc_avail, desc_iova;
 	struct vring_desc *descs = vq->desc;
 	struct vring_desc *idesc = NULL;
 
@@ -261,16 +261,43 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	}
 
 	while (1) {
-		if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) {
+		if (unlikely(idx >= vq->size)) {
 			free_ind_table(idesc);
 			return -1;
 		}
 
+
 		len += descs[idx].len;
-		buf_vec[vec_id].buf_addr = descs[idx].addr;
-		buf_vec[vec_id].buf_len  = descs[idx].len;
-		buf_vec[vec_id].desc_idx = idx;
-		vec_id++;
+		desc_avail = descs[idx].len;
+		desc_iova = descs[idx].addr;
+
+		while (desc_avail) {
+			uint64_t desc_addr;
+			uint64_t desc_chunck_len = desc_avail;
+
+			if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
+				free_ind_table(idesc);
+				return -1;
+			}
+
+			desc_addr = vhost_iova_to_vva(dev, vq,
+					desc_iova,
+					&desc_chunck_len,
+					perm);
+			if (unlikely(!desc_addr)) {
+				free_ind_table(idesc);
+				return -1;
+			}
+
+			buf_vec[vec_id].buf_iova = desc_iova;
+			buf_vec[vec_id].buf_addr = desc_addr;
+			buf_vec[vec_id].buf_len  = desc_chunck_len;
+			buf_vec[vec_id].desc_idx = idx;
+
+			desc_avail -= desc_chunck_len;
+			desc_iova += desc_chunck_len;
+			vec_id++;
+		}
 
 		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
 			break;
@@ -293,7 +320,8 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,  static inline int  reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint32_t size, struct buf_vector *buf_vec,
-				uint16_t *num_buffers, uint16_t avail_head)
+				uint16_t *num_buffers, uint16_t avail_head,
+				uint16_t *nr_vec)
 {
 	uint16_t cur_idx;
 	uint32_t vec_idx = 0;
@@ -315,7 +343,8 @@ reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			return -1;
 
 		if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
-						&head_idx, &len) < 0))
+						&head_idx, &len,
+						VHOST_ACCESS_RW) < 0))
 			return -1;
 		len = RTE_MIN(len, size);
 		update_shadow_used_ring(vq, head_idx, len); @@ -334,21 +363,22 @@ reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			return -1;
 	}
 
+	*nr_vec = vec_idx;
+
 	return 0;
 }
 
 static __rte_always_inline int
 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			    struct rte_mbuf *m, struct buf_vector *buf_vec,
-			    uint16_t num_buffers)
+			    uint16_t nr_vec, uint16_t num_buffers)
 {
 	uint32_t vec_idx = 0;
-	uint64_t desc_addr, desc_gaddr;
 	uint32_t mbuf_offset, mbuf_avail;
-	uint32_t desc_offset, desc_avail;
+	uint32_t buf_offset, buf_avail;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t cpy_len;
-	uint64_t desc_chunck_len;
-	uint64_t hdr_addr, hdr_phys_addr;
+	uint64_t hdr_addr;
 	struct rte_mbuf *hdr_mbuf;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
 	struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; @@ -359,82 +389,57 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		goto out;
 	}
 
-	desc_chunck_len = buf_vec[vec_idx].buf_len;
-	desc_gaddr = buf_vec[vec_idx].buf_addr;
-	desc_addr = vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RW);
-	if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
+
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
 	hdr_mbuf = m;
-	hdr_addr = desc_addr;
-	if (unlikely(desc_chunck_len < dev->vhost_hlen))
+	hdr_addr = buf_addr;
+	if (unlikely(buf_len < dev->vhost_hlen))
 		hdr = &tmp_hdr;
 	else
 		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
-	hdr_phys_addr = desc_gaddr;
 	rte_prefetch0((void *)(uintptr_t)hdr_addr);
 
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
 		dev->vid, num_buffers);
 
-	desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
-	if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
-		desc_chunck_len = desc_avail;
-		desc_gaddr += dev->vhost_hlen;
-		desc_addr = vhost_iova_to_vva(dev, vq,
-				desc_gaddr,
-				&desc_chunck_len,
-				VHOST_ACCESS_RW);
-		if (unlikely(!desc_addr)) {
-			error = -1;
-			goto out;
-		}
-
-		desc_offset = 0;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail = buf_len - buf_offset;
 	} else {
-		desc_offset = dev->vhost_hlen;
-		desc_chunck_len -= dev->vhost_hlen;
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_len - dev->vhost_hlen;
 	}
 
-
 	mbuf_avail  = rte_pktmbuf_data_len(m);
 	mbuf_offset = 0;
 	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current desc buf, get the next one */
-		if (desc_avail == 0) {
+		/* done with current buf, get the next one */
+		if (buf_avail == 0) {
 			vec_idx++;
-			desc_chunck_len = buf_vec[vec_idx].buf_len;
-			desc_gaddr = buf_vec[vec_idx].buf_addr;
-			desc_addr =
-				vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
+			if (unlikely(vec_idx >= nr_vec)) {
 				error = -1;
 				goto out;
 			}
 
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
+
 			/* Prefetch buffer address. */
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
-			desc_offset = 0;
-			desc_avail  = buf_vec[vec_idx].buf_len;
-		} else if (unlikely(desc_chunck_len == 0)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += desc_offset;
-			desc_addr = vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len, VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-			desc_offset = 0;
+			rte_prefetch0((void *)(uintptr_t)buf_addr);
+			buf_offset = 0;
+			buf_avail  = buf_len;
 		}
 
 		/* done with current mbuf, get the next one */ @@ -455,18 +460,12 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint64_t len;
 				uint64_t remain = dev->vhost_hlen;
 				uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
-				uint64_t guest_addr = hdr_phys_addr;
+				uint64_t iova = buf_vec[0].buf_iova;
+				uint16_t hdr_vec_idx = 0;
 
 				while (remain) {
 					len = remain;
-					dst = vhost_iova_to_vva(dev, vq,
-							guest_addr, &len,
-							VHOST_ACCESS_RW);
-					if (unlikely(!dst || !len)) {
-						error = -1;
-						goto out;
-					}
-
+					dst = buf_vec[hdr_vec_idx].buf_addr;
 					rte_memcpy((void *)(uintptr_t)dst,
 							(void *)(uintptr_t)src,
 							len);
@@ -474,50 +473,50 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 					PRINT_PACKET(dev, (uintptr_t)dst,
 							(uint32_t)len, 0);
 					vhost_log_cache_write(dev, vq,
-							guest_addr, len);
+							iova, len);
 
 					remain -= len;
-					guest_addr += len;
+					iova += len;
 					src += len;
+					hdr_vec_idx++;
 				}
 			} else {
 				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
 						dev->vhost_hlen, 0);
-				vhost_log_cache_write(dev, vq, hdr_phys_addr,
+				vhost_log_cache_write(dev, vq,
+						buf_vec[0].buf_iova,
 						dev->vhost_hlen);
 			}
 
 			hdr_addr = 0;
 		}
 
-		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
+		cpy_len = RTE_MIN(buf_len, mbuf_avail);
 
 		if (likely(cpy_len > MAX_BATCH_LEN ||
 					vq->batch_copy_nb_elems >= vq->size)) {
-			rte_memcpy((void *)((uintptr_t)(desc_addr +
-							desc_offset)),
+			rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
 				cpy_len);
-			vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
+			vhost_log_cache_write(dev, vq, buf_iova + buf_offset,
 					cpy_len);
-			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+			PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
 				cpy_len, 0);
 		} else {
 			batch_copy[vq->batch_copy_nb_elems].dst =
-				(void *)((uintptr_t)(desc_addr + desc_offset));
+				(void *)((uintptr_t)(buf_addr + buf_offset));
 			batch_copy[vq->batch_copy_nb_elems].src =
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
 			batch_copy[vq->batch_copy_nb_elems].log_addr =
-				desc_gaddr + desc_offset;
+				buf_iova + buf_offset;
 			batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
 			vq->batch_copy_nb_elems++;
 		}
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
-		desc_chunck_len -= cpy_len;
+		buf_avail  -= cpy_len;
+		buf_offset += cpy_len;
 	}
 
 out:
@@ -568,10 +567,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	avail_head = *((volatile uint16_t *)&vq->avail->idx);
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+		uint16_t nr_vec = 0;
 
 		if (unlikely(reserve_avail_buf(dev, vq,
 						pkt_len, buf_vec, &num_buffers,
-						avail_head) < 0)) {
+						avail_head, &nr_vec) < 0)) {
 			VHOST_LOG_DEBUG(VHOST_DATA,
 				"(%d) failed to get enough desc from vring\n",
 				dev->vid);
@@ -584,7 +584,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			vq->last_avail_idx + num_buffers);
 
 		if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
-						buf_vec, num_buffers) < 0) {
+						buf_vec, nr_vec,
+						num_buffers) < 0) {
 			vq->shadow_used_idx -= num_buffers;
 			break;
 		}
@@ -750,49 +751,40 @@ put_zmbuf(struct zcopy_mbuf *zmbuf)
 
 static __rte_always_inline int
 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct vring_desc *descs, uint16_t max_desc,
-		  struct rte_mbuf *m, uint16_t desc_idx,
-		  struct rte_mempool *mbuf_pool)
+		  struct buf_vector *buf_vec, uint16_t nr_vec,
+		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool)
 {
-	struct vring_desc *desc;
-	uint64_t desc_addr, desc_gaddr;
-	uint32_t desc_avail, desc_offset;
+	uint32_t buf_avail, buf_offset;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t mbuf_avail, mbuf_offset;
 	uint32_t cpy_len;
-	uint64_t desc_chunck_len;
 	struct rte_mbuf *cur = m, *prev = m;
 	struct virtio_net_hdr tmp_hdr;
 	struct virtio_net_hdr *hdr = NULL;
 	/* A counter to avoid desc dead loop chain */
-	uint32_t nr_desc = 1;
+	uint16_t vec_idx = 0;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
 	int error = 0;
 
-	desc = &descs[desc_idx];
-	if (unlikely((desc->len < dev->vhost_hlen)) ||
-			(desc->flags & VRING_DESC_F_INDIRECT)) {
-		error = -1;
-		goto out;
-	}
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
 
-	desc_chunck_len = desc->len;
-	desc_gaddr = desc->addr;
-	desc_addr = vhost_iova_to_vva(dev,
-					vq, desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RO);
-	if (unlikely(!desc_addr)) {
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
+	if (likely(nr_vec > 1))
+		rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
 	if (virtio_net_with_host_offload(dev)) {
-		if (unlikely(desc_chunck_len < sizeof(struct virtio_net_hdr))) {
-			uint64_t len = desc_chunck_len;
+		if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
+			uint64_t len;
 			uint64_t remain = sizeof(struct virtio_net_hdr);
-			uint64_t src = desc_addr;
+			uint64_t src;
 			uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
-			uint64_t guest_addr = desc_gaddr;
+			uint16_t hdr_vec_idx = 0;
 
 			/*
 			 * No luck, the virtio-net header doesn't fit @@ -800,25 +792,18 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			 */
 			while (remain) {
 				len = remain;
-				src = vhost_iova_to_vva(dev, vq,
-						guest_addr, &len,
-						VHOST_ACCESS_RO);
-				if (unlikely(!src || !len)) {
-					error = -1;
-					goto out;
-				}
-
+				src = buf_vec[hdr_vec_idx].buf_addr;
 				rte_memcpy((void *)(uintptr_t)dst,
 						   (void *)(uintptr_t)src, len);
 
-				guest_addr += len;
 				remain -= len;
 				dst += len;
+				hdr_vec_idx++;
 			}
 
 			hdr = &tmp_hdr;
 		} else {
-			hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
 			rte_prefetch0(hdr);
 		}
 	}
@@ -828,61 +813,40 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	 * for Tx: the first for storing the header, and others
 	 * for storing the data.
 	 */
-	if (likely((desc->len == dev->vhost_hlen) &&
-		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
-		desc = &descs[desc->next];
-		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-			error = -1;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail  = buf_len - buf_offset;
+	} else if (buf_len == dev->vhost_hlen) {
+		if (unlikely(++vec_idx >= nr_vec))
 			goto out;
-		}
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
 
-		desc_chunck_len = desc->len;
-		desc_gaddr = desc->addr;
-		desc_addr = vhost_iova_to_vva(dev,
-							vq, desc_gaddr,
-							&desc_chunck_len,
-							VHOST_ACCESS_RO);
-		if (unlikely(!desc_addr)) {
-			error = -1;
-			goto out;
-		}
-
-		desc_offset = 0;
-		desc_avail  = desc->len;
-		nr_desc    += 1;
+		buf_offset = 0;
+		buf_avail = buf_len;
 	} else {
-		desc_avail  = desc->len - dev->vhost_hlen;
-
-		if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += dev->vhost_hlen;
-			desc_addr = vhost_iova_to_vva(dev,
-					vq, desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-
-			desc_offset = 0;
-		} else {
-			desc_offset = dev->vhost_hlen;
-			desc_chunck_len -= dev->vhost_hlen;
-		}
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
 	}
 
-	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+	rte_prefetch0((void *)(uintptr_t)
+			(buf_addr + buf_offset));
 
-	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-			(uint32_t)desc_chunck_len, 0);
+	PRINT_PACKET(dev,
+			(uintptr_t)(buf_addr + buf_offset),
+			(uint32_t)buf_avail, 0);
 
 	mbuf_offset = 0;
 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
 	while (1) {
 		uint64_t hpa;
 
-		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
 		/*
 		 * A desc buf might across two host physical pages that are @@ -890,11 +854,11 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		 * will be copied even though zero copy is enabled.
 		 */
 		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
-					desc_gaddr + desc_offset, cpy_len)))) {
+					buf_iova + buf_offset, cpy_len)))) {
 			cur->data_len = cpy_len;
 			cur->data_off = 0;
-			cur->buf_addr = (void *)(uintptr_t)(desc_addr
-				+ desc_offset);
+			cur->buf_addr =
+				(void *)(uintptr_t)(buf_addr + buf_offset);
 			cur->buf_iova = hpa;
 
 			/*
@@ -905,20 +869,19 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		} else {
 			if (likely(cpy_len > MAX_BATCH_LEN ||
 				   vq->batch_copy_nb_elems >= vq->size ||
-				   (hdr && cur == m) ||
-				   desc->len != desc_chunck_len)) {
+				   (hdr && cur == m))) {
 				rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
 								   mbuf_offset),
-					   (void *)((uintptr_t)(desc_addr +
-								desc_offset)),
+					   (void *)((uintptr_t)(buf_addr +
+							   buf_offset)),
 					   cpy_len);
 			} else {
 				batch_copy[vq->batch_copy_nb_elems].dst =
 					rte_pktmbuf_mtod_offset(cur, void *,
 								mbuf_offset);
 				batch_copy[vq->batch_copy_nb_elems].src =
-					(void *)((uintptr_t)(desc_addr +
-							     desc_offset));
+					(void *)((uintptr_t)(buf_addr +
+								buf_offset));
 				batch_copy[vq->batch_copy_nb_elems].len =
 					cpy_len;
 				vq->batch_copy_nb_elems++;
@@ -927,59 +890,25 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_chunck_len -= cpy_len;
-		desc_offset += cpy_len;
+		buf_avail -= cpy_len;
+		buf_offset += cpy_len;
 
-		/* This desc reaches to its end, get the next one */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+		/* This buf reaches to its end, get the next one */
+		if (buf_avail == 0) {
+			if (++vec_idx >= nr_vec)
 				break;
 
-			if (unlikely(desc->next >= max_desc ||
-				     ++nr_desc > max_desc)) {
-				error = -1;
-				goto out;
-			}
-			desc = &descs[desc->next];
-			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-				error = -1;
-				goto out;
-			}
-
-			desc_chunck_len = desc->len;
-			desc_gaddr = desc->addr;
-			desc_addr = vhost_iova_to_vva(dev,
-							vq, desc_gaddr,
-							&desc_chunck_len,
-							VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
 
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
+			rte_prefetch0((void *)(uintptr_t)buf_addr);
 
-			desc_offset = 0;
-			desc_avail  = desc->len;
-
-			PRINT_PACKET(dev, (uintptr_t)desc_addr,
-					(uint32_t)desc_chunck_len, 0);
-		} else if (unlikely(desc_chunck_len == 0)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += desc_offset;
-			desc_addr = vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-			desc_offset = 0;
+			buf_offset = 0;
+			buf_avail  = buf_len;
 
-			PRINT_PACKET(dev, (uintptr_t)desc_addr,
-					(uint32_t)desc_chunck_len, 0);
+			PRINT_PACKET(dev, (uintptr_t)buf_addr,
+					(uint32_t)buf_avail, 0);
 		}
 
 		/*
@@ -1085,10 +1014,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	struct virtio_net *dev;
 	struct rte_mbuf *rarp_mbuf = NULL;
 	struct vhost_virtqueue *vq;
-	uint32_t desc_indexes[MAX_PKT_BURST];
 	uint32_t i = 0;
 	uint16_t free_entries;
-	uint16_t avail_idx;
 
 	dev = get_device(vid);
 	if (!dev)
@@ -1186,80 +1113,38 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
-	/* Prefetch available and used ring */
-	avail_idx = vq->last_avail_idx & (vq->size - 1);
-	rte_prefetch0(&vq->avail->ring[avail_idx]);
-
 	count = RTE_MIN(count, MAX_PKT_BURST);
 	count = RTE_MIN(count, free_entries);
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
 			dev->vid, count);
 
-	/* Retrieve all of the head indexes first to avoid caching issues. */
-	for (i = 0; i < count; i++) {
-		avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[avail_idx];
-
-		if (likely(dev->dequeue_zero_copy == 0))
-			update_shadow_used_ring(vq, desc_indexes[i], 0);
-	}
-
-	/* Prefetch descriptor index. */
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
 	for (i = 0; i < count; i++) {
-		struct vring_desc *desc, *idesc = NULL;
-		uint16_t sz, idx;
-		uint64_t dlen;
+		struct buf_vector buf_vec[BUF_VECTOR_MAX];
+		uint16_t head_idx, dummy_len;
+		uint32_t nr_vec = 0;
 		int err;
 
-		if (likely(i + 1 < count))
-			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
-
-		if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
-			dlen = vq->desc[desc_indexes[i]].len;
-			desc = (struct vring_desc *)(uintptr_t)
-				vhost_iova_to_vva(dev, vq,
-						vq->desc[desc_indexes[i]].addr,
-						&dlen,
-						VHOST_ACCESS_RO);
-			if (unlikely(!desc))
-				break;
-
-			if (unlikely(dlen < vq->desc[desc_indexes[i]].len)) {
-				/*
-				 * The indirect desc table is not contiguous
-				 * in process VA space, we have to copy it.
-				 */
-				idesc = alloc_copy_ind_table(dev, vq,
-						&vq->desc[desc_indexes[i]]);
-				if (unlikely(!idesc))
-					break;
-
-				desc = idesc;
-			}
+		if (unlikely(fill_vec_buf(dev, vq,
+						vq->last_avail_idx + i,
+						&nr_vec, buf_vec,
+						&head_idx, &dummy_len,
+						VHOST_ACCESS_RO) < 0))
+			break;
 
-			rte_prefetch0(desc);
-			sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
-			idx = 0;
-		} else {
-			desc = vq->desc;
-			sz = vq->size;
-			idx = desc_indexes[i];
-		}
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_shadow_used_ring(vq, head_idx, 0);
 
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
 			RTE_LOG(ERR, VHOST_DATA,
 				"Failed to allocate memory for mbuf.\n");
-			free_ind_table(idesc);
 			break;
 		}
 
-		err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx,
-					mbuf_pool);
+		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+				mbuf_pool);
 		if (unlikely(err)) {
 			rte_pktmbuf_free(pkts[i]);
-			free_ind_table(idesc);
 			break;
 		}
 
@@ -1269,11 +1154,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			zmbuf = get_zmbuf(vq);
 			if (!zmbuf) {
 				rte_pktmbuf_free(pkts[i]);
-				free_ind_table(idesc);
 				break;
 			}
 			zmbuf->mbuf = pkts[i];
-			zmbuf->desc_idx = desc_indexes[i];
+			zmbuf->desc_idx = head_idx;
 
 			/*
 			 * Pin lock the mbuf; we will check later to see @@ -1286,9 +1170,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			vq->nr_zmbuf += 1;
 			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
 		}
-
-		if (unlikely(!!idesc))
-			free_ind_table(idesc);
 	}
 	vq->last_avail_idx += i;
 
--
2.14.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-07-17  7:17 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-06  7:04 [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Maxime Coquelin
2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 1/5] vhost: use shadow used ring in dequeue path Maxime Coquelin
2018-07-06  7:59   ` Maxime Coquelin
2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 2/5] vhost: use buffer vectors " Maxime Coquelin
     [not found]   ` <2DBBFF226F7CF64BAFCA79B681719D953A4EB9E3@SHSMSX101.ccr.corp.intel.com>
     [not found]     ` <E0CBA5A1980F1F408E1F28F9991B5B1D50D03E40@SHSMSX104.ccr.corp.intel.com>
2018-07-17  7:17       ` Wang, Yinan
2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 3/5] vhost: improve prefetching " Maxime Coquelin
2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 4/5] vhost: prefetch first descriptor " Maxime Coquelin
2018-07-06  7:04 ` [dpdk-dev] [PATCH v4 5/5] vhost: improve prefetching in enqueue path Maxime Coquelin
2018-07-09  1:39 ` [dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors Tiwei Bie
2018-07-09  7:01 ` Tiwei Bie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).