[dpdk-dev] [PATCH 0/2] Vhost: unitfy receive paths

DPDK patches and discussions
 help / color / mirror / Atom feed

* [dpdk-dev] [PATCH 0/2] Vhost: unitfy receive paths
@ 2018-05-28 16:23 Maxime Coquelin
  2018-05-28 16:23 ` [dpdk-dev] [PATCH 1/2] vhost: unify Rx mergeable and non-mergeable paths Maxime Coquelin
  2018-05-28 16:23 ` [dpdk-dev] [PATCH 2/2] vhost: improve batched copies performance Maxime Coquelin
  0 siblings, 2 replies; 4+ messages in thread
From: Maxime Coquelin @ 2018-05-28 16:23 UTC (permalink / raw)
  To: dev, tiwei.bie, zhihong.wang; +Cc: Maxime Coquelin

Hi,

This series is preliminary work to ease the integration of
packed ring layout support. But even without packed ring
layout, the result is positive.

First patch unify both paths, and second one is a small
optimization to avoid copying batch_copy_nb_elems VQ field
to/from the stack.

With the series applied, I get modest performance gain for
both mergeable and non-mergeable casesi (, and the gain of
about 300 LoC is non negligible maintenance-wise.

Rx-mrg=off benchmarks:

+------------+-------+-------------+-------------+----------+
|    Run     |  PVP  | Guest->Host | Host->Guest | Loopback |
+------------+-------+-------------+-------------+----------+
| v18.05-rc5 | 14.47 |       16.64 |       17.57 |    13.15 |
| + series   | 14.87 |       16.86 |       17.70 |    13.30 |
+------------+-------+-------------+-------------+----------+

Rx-mrg=on benchmarks:

+------------+------+-------------+-------------+----------+
|    Run     | PVP  | Guest->Host | Host->Guest | Loopback |
+------------+------+-------------+-------------+----------+
| v18.05-rc5 | 9.38 |       13.78 |       16.70 |    12.79 |
| + series   | 9.38 |       13.80 |       17.49 |    13.36 |
+------------+------+-------------+-------------+----------+

Note: Even without my series, the guest->host benchmark with
mergeable buffers enabled looks suspicious as it should in
theory be alsmost identical as when Rx mergeable buffers are
disabled. To be investigated...

Maxime Coquelin (2):
  vhost: unify Rx mergeable and non-mergeable paths
  vhost: improve batched copies performance

 lib/librte_vhost/virtio_net.c | 366 ++++--------------------------------------
 1 file changed, 32 insertions(+), 334 deletions(-)

-- 
2.14.3

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [dpdk-dev] [PATCH 1/2] vhost: unify Rx mergeable and non-mergeable paths
  2018-05-28 16:23 [dpdk-dev] [PATCH 0/2] Vhost: unitfy receive paths Maxime Coquelin
@ 2018-05-28 16:23 ` Maxime Coquelin
  2018-05-28 18:33   ` Maxime Coquelin
  2018-05-28 16:23 ` [dpdk-dev] [PATCH 2/2] vhost: improve batched copies performance Maxime Coquelin
  1 sibling, 1 reply; 4+ messages in thread
From: Maxime Coquelin @ 2018-05-28 16:23 UTC (permalink / raw)
  To: dev, tiwei.bie, zhihong.wang; +Cc: Maxime Coquelin

This patch reworks the vhost enqueue path so that a single
code path is used for both Rx mergeable or non-mergeable cases.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/virtio_net.c | 337 +++---------------------------------------
 1 file changed, 18 insertions(+), 319 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 76ec5f089..c5237f853 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -25,6 +25,12 @@
 
 #define MAX_BATCH_LEN 256
 
+static  __rte_always_inline bool
+rxvq_is_mergeable(struct virtio_net *dev)
+{
+	return dev->features && (1ULL << VIRTIO_NET_F_MRG_RXBUF);
+}
+
 static bool
 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
 {
@@ -154,7 +160,7 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
 		(var) = (val);			\
 } while (0)
 
-static void
+static __rte_always_inline void
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
 	uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
@@ -215,317 +221,6 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 	}
 }
 
-static __rte_always_inline int
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct vring_desc *descs, struct rte_mbuf *m,
-		  uint16_t desc_idx, uint32_t size)
-{
-	uint32_t desc_avail, desc_offset;
-	uint32_t mbuf_avail, mbuf_offset;
-	uint32_t cpy_len;
-	uint64_t desc_chunck_len;
-	struct vring_desc *desc;
-	uint64_t desc_addr, desc_gaddr;
-	/* A counter to avoid desc dead loop chain */
-	uint16_t nr_desc = 1;
-	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
-	int error = 0;
-
-	desc = &descs[desc_idx];
-	desc_chunck_len = desc->len;
-	desc_gaddr = desc->addr;
-	desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr,
-					&desc_chunck_len, VHOST_ACCESS_RW);
-	/*
-	 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
-	 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
-	 * otherwise stores offset on the stack instead of in a register.
-	 */
-	if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) {
-		error = -1;
-		goto out;
-	}
-
-	rte_prefetch0((void *)(uintptr_t)desc_addr);
-
-	if (likely(desc_chunck_len >= dev->vhost_hlen)) {
-		virtio_enqueue_offload(m,
-				(struct virtio_net_hdr *)(uintptr_t)desc_addr);
-		PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
-		vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen);
-	} else {
-		struct virtio_net_hdr vnet_hdr;
-		uint64_t remain = dev->vhost_hlen;
-		uint64_t len;
-		uint64_t src = (uint64_t)(uintptr_t)&vnet_hdr, dst;
-		uint64_t guest_addr = desc_gaddr;
-
-		virtio_enqueue_offload(m, &vnet_hdr);
-
-		while (remain) {
-			len = remain;
-			dst = vhost_iova_to_vva(dev, vq, guest_addr,
-					&len, VHOST_ACCESS_RW);
-			if (unlikely(!dst || !len)) {
-				error = -1;
-				goto out;
-			}
-
-			rte_memcpy((void *)(uintptr_t)dst,
-					(void *)(uintptr_t)src, len);
-
-			PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0);
-			vhost_log_cache_write(dev, vq, guest_addr, len);
-			remain -= len;
-			guest_addr += len;
-			src += len;
-		}
-	}
-
-	desc_avail  = desc->len - dev->vhost_hlen;
-	if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
-		desc_chunck_len = desc_avail;
-		desc_gaddr = desc->addr + dev->vhost_hlen;
-		desc_addr = vhost_iova_to_vva(dev,
-				vq, desc_gaddr,
-				&desc_chunck_len,
-				VHOST_ACCESS_RW);
-		if (unlikely(!desc_addr)) {
-			error = -1;
-			goto out;
-		}
-
-		desc_offset = 0;
-	} else {
-		desc_offset = dev->vhost_hlen;
-		desc_chunck_len -= dev->vhost_hlen;
-	}
-
-	mbuf_avail  = rte_pktmbuf_data_len(m);
-	mbuf_offset = 0;
-	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current mbuf, fetch next */
-		if (mbuf_avail == 0) {
-			m = m->next;
-
-			mbuf_offset = 0;
-			mbuf_avail  = rte_pktmbuf_data_len(m);
-		}
-
-		/* done with current desc buf, fetch next */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
-				/* Room in vring buffer is not enough */
-				error = -1;
-				goto out;
-			}
-			if (unlikely(desc->next >= size || ++nr_desc > size)) {
-				error = -1;
-				goto out;
-			}
-
-			desc = &descs[desc->next];
-			desc_chunck_len = desc->len;
-			desc_gaddr = desc->addr;
-			desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr,
-							&desc_chunck_len,
-							VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-
-			desc_offset = 0;
-			desc_avail  = desc->len;
-		} else if (unlikely(desc_chunck_len == 0)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += desc_offset;
-			desc_addr = vhost_iova_to_vva(dev,
-					vq, desc_gaddr,
-					&desc_chunck_len, VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-			desc_offset = 0;
-		}
-
-		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
-		if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
-			rte_memcpy((void *)((uintptr_t)(desc_addr +
-							desc_offset)),
-				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
-				cpy_len);
-			vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
-					cpy_len);
-			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-				     cpy_len, 0);
-		} else {
-			batch_copy[copy_nb].dst =
-				(void *)((uintptr_t)(desc_addr + desc_offset));
-			batch_copy[copy_nb].src =
-				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-			batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset;
-			batch_copy[copy_nb].len = cpy_len;
-			copy_nb++;
-		}
-
-		mbuf_avail  -= cpy_len;
-		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
-		desc_chunck_len -= cpy_len;
-	}
-
-out:
-	vq->batch_copy_nb_elems = copy_nb;
-
-	return error;
-}
-
-/**
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are successfully
- * added to the RX queue. This function works when the mbuf is scattered, but
- * it doesn't support the mergeable feature.
- */
-static __rte_always_inline uint32_t
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
-	      struct rte_mbuf **pkts, uint32_t count)
-{
-	struct vhost_virtqueue *vq;
-	uint16_t avail_idx, free_entries, start_idx;
-	uint16_t desc_indexes[MAX_PKT_BURST];
-	struct vring_desc *descs;
-	uint16_t used_idx;
-	uint32_t i, sz;
-
-	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
-
-	vq = dev->virtqueue[queue_id];
-
-	rte_spinlock_lock(&vq->access_lock);
-
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
-
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
-
-	if (unlikely(vq->access_ok == 0)) {
-		if (unlikely(vring_translate(dev, vq) < 0)) {
-			count = 0;
-			goto out;
-		}
-	}
-
-	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-	start_idx = vq->last_used_idx;
-	free_entries = avail_idx - start_idx;
-	count = RTE_MIN(count, free_entries);
-	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-	if (count == 0)
-		goto out;
-
-	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
-		dev->vid, start_idx, start_idx + count);
-
-	vq->batch_copy_nb_elems = 0;
-
-	/* Retrieve all of the desc indexes first to avoid caching issues. */
-	rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
-	for (i = 0; i < count; i++) {
-		used_idx = (start_idx + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[used_idx];
-		vq->used->ring[used_idx].id = desc_indexes[i];
-		vq->used->ring[used_idx].len = pkts[i]->pkt_len +
-					       dev->vhost_hlen;
-		vhost_log_cache_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
-	}
-
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-	for (i = 0; i < count; i++) {
-		struct vring_desc *idesc = NULL;
-		uint16_t desc_idx = desc_indexes[i];
-		int err;
-
-		if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
-			uint64_t dlen = vq->desc[desc_idx].len;
-			descs = (struct vring_desc *)(uintptr_t)
-				vhost_iova_to_vva(dev,
-						vq, vq->desc[desc_idx].addr,
-						&dlen, VHOST_ACCESS_RO);
-			if (unlikely(!descs)) {
-				count = i;
-				break;
-			}
-
-			if (unlikely(dlen < vq->desc[desc_idx].len)) {
-				/*
-				 * The indirect desc table is not contiguous
-				 * in process VA space, we have to copy it.
-				 */
-				idesc = alloc_copy_ind_table(dev, vq,
-							&vq->desc[desc_idx]);
-				if (unlikely(!idesc))
-					break;
-
-				descs = idesc;
-			}
-
-			desc_idx = 0;
-			sz = vq->desc[desc_idx].len / sizeof(*descs);
-		} else {
-			descs = vq->desc;
-			sz = vq->size;
-		}
-
-		err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz);
-		if (unlikely(err)) {
-			count = i;
-			free_ind_table(idesc);
-			break;
-		}
-
-		if (i + 1 < count)
-			rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
-
-		if (unlikely(!!idesc))
-			free_ind_table(idesc);
-	}
-
-	do_data_copy_enqueue(dev, vq);
-
-	rte_smp_wmb();
-
-	vhost_log_cache_sync(dev, vq);
-
-	*(volatile uint16_t *)&vq->used->idx += count;
-	vq->last_used_idx += count;
-	vhost_log_used_vring(dev, vq,
-		offsetof(struct vring_used, idx),
-		sizeof(vq->used->idx));
-
-	vhost_vring_call(dev, vq);
-out:
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
-out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
-
-	return count;
-}
-
 static __rte_always_inline int
 fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			 uint32_t avail_idx, uint32_t *vec_idx,
@@ -602,7 +297,7 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 {
 	uint16_t cur_idx;
 	uint32_t vec_idx = 0;
-	uint16_t tries = 0;
+	uint16_t max_tries, tries = 0;
 
 	uint16_t head_idx = 0;
 	uint16_t len = 0;
@@ -610,6 +305,11 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	*num_buffers = 0;
 	cur_idx  = vq->last_avail_idx;
 
+	if (rxvq_is_mergeable(dev))
+		max_tries = vq->size;
+	else
+		max_tries = 1;
+
 	while (size > 0) {
 		if (unlikely(cur_idx == avail_head))
 			return -1;
@@ -630,7 +330,7 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		 * can't get enough buf, it means something abnormal
 		 * happened.
 		 */
-		if (unlikely(tries >= vq->size))
+		if (unlikely(tries > max_tries))
 			return -1;
 	}
 
@@ -748,7 +448,9 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 		if (hdr_addr) {
 			virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
-			ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers);
+			if (rxvq_is_mergeable(dev))
+				ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
+						num_buffers);
 
 			if (unlikely(hdr == &tmp_hdr)) {
 				uint64_t len;
@@ -923,10 +625,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 		return 0;
 	}
 
-	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
-		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-	else
-		return virtio_dev_rx(dev, queue_id, pkts, count);
+	return virtio_dev_merge_rx(dev, queue_id, pkts, count);
 }
 
 static inline bool
-- 
2.14.3

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [dpdk-dev] [PATCH 2/2] vhost: improve batched copies performance
  2018-05-28 16:23 [dpdk-dev] [PATCH 0/2] Vhost: unitfy receive paths Maxime Coquelin
  2018-05-28 16:23 ` [dpdk-dev] [PATCH 1/2] vhost: unify Rx mergeable and non-mergeable paths Maxime Coquelin
@ 2018-05-28 16:23 ` Maxime Coquelin
  1 sibling, 0 replies; 4+ messages in thread
From: Maxime Coquelin @ 2018-05-28 16:23 UTC (permalink / raw)
  To: dev, tiwei.bie, zhihong.wang; +Cc: Maxime Coquelin

Instead of copying batch_copy_nb_elems into the stack,
this patch uses it directly.

Small performance gain of 3% is seen when running PVP
benchmark.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/virtio_net.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index c5237f853..d29c61ff8 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -352,7 +352,6 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mbuf *hdr_mbuf;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
 	struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
 	int error = 0;
 
 	if (unlikely(m == NULL)) {
@@ -493,7 +492,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
 
-		if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
+		if (likely(cpy_len > MAX_BATCH_LEN ||
+					vq->batch_copy_nb_elems >= vq->size)) {
 			rte_memcpy((void *)((uintptr_t)(desc_addr +
 							desc_offset)),
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
@@ -503,13 +503,14 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
 				cpy_len, 0);
 		} else {
-			batch_copy[copy_nb].dst =
+			batch_copy[vq->batch_copy_nb_elems].dst =
 				(void *)((uintptr_t)(desc_addr + desc_offset));
-			batch_copy[copy_nb].src =
+			batch_copy[vq->batch_copy_nb_elems].src =
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-			batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset;
-			batch_copy[copy_nb].len = cpy_len;
-			copy_nb++;
+			batch_copy[vq->batch_copy_nb_elems].log_addr =
+				desc_gaddr + desc_offset;
+			batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
+			vq->batch_copy_nb_elems++;
 		}
 
 		mbuf_avail  -= cpy_len;
@@ -520,7 +521,6 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	}
 
 out:
-	vq->batch_copy_nb_elems = copy_nb;
 
 	return error;
 }
@@ -766,7 +766,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	/* A counter to avoid desc dead loop chain */
 	uint32_t nr_desc = 1;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
 	int error = 0;
 
 	desc = &descs[desc_idx];
@@ -905,7 +904,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			mbuf_avail = cpy_len;
 		} else {
 			if (likely(cpy_len > MAX_BATCH_LEN ||
-				   copy_nb >= vq->size ||
+				   vq->batch_copy_nb_elems >= vq->size ||
 				   (hdr && cur == m) ||
 				   desc->len != desc_chunck_len)) {
 				rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
@@ -914,14 +913,15 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 								desc_offset)),
 					   cpy_len);
 			} else {
-				batch_copy[copy_nb].dst =
+				batch_copy[vq->batch_copy_nb_elems].dst =
 					rte_pktmbuf_mtod_offset(cur, void *,
 								mbuf_offset);
-				batch_copy[copy_nb].src =
+				batch_copy[vq->batch_copy_nb_elems].src =
 					(void *)((uintptr_t)(desc_addr +
 							     desc_offset));
-				batch_copy[copy_nb].len = cpy_len;
-				copy_nb++;
+				batch_copy[vq->batch_copy_nb_elems].len =
+					cpy_len;
+				vq->batch_copy_nb_elems++;
 			}
 		}
 
@@ -1015,7 +1015,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		vhost_dequeue_offload(hdr, m);
 
 out:
-	vq->batch_copy_nb_elems = copy_nb;
 
 	return error;
 }
-- 
2.14.3

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] vhost: unify Rx mergeable and non-mergeable paths
  2018-05-28 16:23 ` [dpdk-dev] [PATCH 1/2] vhost: unify Rx mergeable and non-mergeable paths Maxime Coquelin
@ 2018-05-28 18:33   ` Maxime Coquelin
  0 siblings, 0 replies; 4+ messages in thread
From: Maxime Coquelin @ 2018-05-28 18:33 UTC (permalink / raw)
  To: dev, tiwei.bie, zhihong.wang



On 05/28/2018 06:23 PM, Maxime Coquelin wrote:
> @@ -602,7 +297,7 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,

Just notice I forgot to remove "mergeable" from the functions names here
and below. I'll fix this in next revision after having collected some
feedback.

>   {
>   	uint16_t cur_idx;
>   	uint32_t vec_idx = 0;
> -	uint16_t tries = 0;
> +	uint16_t max_tries, tries = 0;
>   
>   	uint16_t head_idx = 0;
>   	uint16_t len = 0;
> @@ -610,6 +305,11 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   	*num_buffers = 0;
>   	cur_idx  = vq->last_avail_idx;
>   
> +	if (rxvq_is_mergeable(dev))
> +		max_tries = vq->size;
> +	else
> +		max_tries = 1;
> +
>   	while (size > 0) {
>   		if (unlikely(cur_idx == avail_head))
>   			return -1;
> @@ -630,7 +330,7 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   		 * can't get enough buf, it means something abnormal
>   		 * happened.
>   		 */
> -		if (unlikely(tries >= vq->size))
> +		if (unlikely(tries > max_tries))
>   			return -1;
>   	}
>   
> @@ -748,7 +448,9 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   
>   		if (hdr_addr) {
>   			virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
> -			ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers);
> +			if (rxvq_is_mergeable(dev))
> +				ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
> +						num_buffers);
>   
>   			if (unlikely(hdr == &tmp_hdr)) {
>   				uint64_t len;
> @@ -923,10 +625,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
>   		return 0;
>   	}
>   
> -	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
> -		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
> -	else
> -		return virtio_dev_rx(dev, queue_id, pkts, count);
> +	return virtio_dev_merge_rx(dev, queue_id, pkts, count);
>   }
>   
>   static inline bool
> -- 2.14.3

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-05-28 18:33 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-28 16:23 [dpdk-dev] [PATCH 0/2] Vhost: unitfy receive paths Maxime Coquelin
2018-05-28 16:23 ` [dpdk-dev] [PATCH 1/2] vhost: unify Rx mergeable and non-mergeable paths Maxime Coquelin
2018-05-28 18:33   ` Maxime Coquelin
2018-05-28 16:23 ` [dpdk-dev] [PATCH 2/2] vhost: improve batched copies performance Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).