DPDK patches and discussions
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: yuanhan.liu@linux.intel.com, huawei.xie@intel.com, dev@dpdk.org
Cc: vkaplans@redhat.com, mst@redhat.com, stephen@networkplumber.org
Subject: [dpdk-dev] [PATCH v2] vhost: Add indirect descriptors support to the TX path
Date: Fri, 23 Sep 2016 09:16:49 +0200	[thread overview]
Message-ID: <1474615009-26626-1-git-send-email-maxime.coquelin@redhat.com> (raw)

Indirect descriptors are usually supported by virtio-net devices,
allowing to dispatch a larger number of requests.

When the virtio device sends a packet using indirect descriptors,
only one slot is used in the ring, even for large packets.

The main effect is to improve the 0% packet loss benchmark.
A PVP benchmark using Moongen (64 bytes) on the TE, and testpmd
(fwd io for host, macswap for VM) on DUT shows a +50% gain for
zero loss.

On the downside, micro-benchmark using testpmd txonly in VM and
rxonly on host shows a loss between 1 and 4%.i But depending on
the needs, feature can be disabled at VM boot time by passing
indirect_desc=off argument to vhost-user device in Qemu.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
Changes since v2:
-----------------
 - Enrich commit message with figures
 - Rebased on top of dpdk-next-virtio's master
 - Add feature check to ensure we don't receive an indirect desc
if not supported by the virtio driver

 lib/librte_vhost/vhost.c      |  3 ++-
 lib/librte_vhost/virtio_net.c | 38 +++++++++++++++++++++++++++++---------
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 46095c3..30bb0ce 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -65,7 +65,8 @@
 				(1ULL << VIRTIO_NET_F_CSUM)    | \
 				(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
 				(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
-				(1ULL << VIRTIO_NET_F_GUEST_TSO6))
+				(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+				(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
 
 uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
 
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 8a151af..0f7dd81 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -679,8 +679,8 @@ make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
 }
 
 static inline int __attribute__((always_inline))
-copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct rte_mbuf *m, uint16_t desc_idx,
+copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
+		  uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
 		  struct rte_mempool *mbuf_pool)
 {
 	struct vring_desc *desc;
@@ -693,7 +693,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	/* A counter to avoid desc dead loop chain */
 	uint32_t nr_desc = 1;
 
-	desc = &vq->desc[desc_idx];
+	desc = &descs[desc_idx];
 	if (unlikely(desc->len < dev->vhost_hlen))
 		return -1;
 
@@ -711,7 +711,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	 */
 	if (likely((desc->len == dev->vhost_hlen) &&
 		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
-		desc = &vq->desc[desc->next];
+		desc = &descs[desc->next];
 
 		desc_addr = gpa_to_vva(dev, desc->addr);
 		if (unlikely(!desc_addr))
@@ -747,10 +747,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
 				break;
 
-			if (unlikely(desc->next >= vq->size ||
-				     ++nr_desc > vq->size))
+			if (unlikely(desc->next >= max_desc ||
+				     ++nr_desc > max_desc))
 				return -1;
-			desc = &vq->desc[desc->next];
+			desc = &descs[desc->next];
 
 			desc_addr = gpa_to_vva(dev, desc->addr);
 			if (unlikely(!desc_addr))
@@ -878,19 +878,39 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	/* Prefetch descriptor index. */
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
 	for (i = 0; i < count; i++) {
+		struct vring_desc *desc;
+		uint16_t sz, idx;
 		int err;
 
 		if (likely(i + 1 < count))
 			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
 
+		if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
+			if (unlikely(!(dev->features &
+					(1ULL << VIRTIO_RING_F_INDIRECT_DESC)))) {
+				RTE_LOG(ERR, VHOST_DATA,
+						"Indirect desc but feature not negotiated.\n");
+				break;
+			}
+
+			desc = (struct vring_desc *)gpa_to_vva(dev,
+					vq->desc[desc_indexes[i]].addr);
+			rte_prefetch0(desc);
+			sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
+			idx = 0;
+		} else {
+			desc = vq->desc;
+			sz = vq->size;
+			idx = desc_indexes[i];
+		}
+
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
 			RTE_LOG(ERR, VHOST_DATA,
 				"Failed to allocate memory for mbuf.\n");
 			break;
 		}
-		err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i],
-					mbuf_pool);
+		err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
 		if (unlikely(err)) {
 			rte_pktmbuf_free(pkts[i]);
 			break;
-- 
2.7.4

             reply	other threads:[~2016-09-23  7:16 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-23  7:16 Maxime Coquelin [this message]
2016-09-23  7:29 ` Yuanhan Liu
2016-09-23  7:33   ` Maxime Coquelin
2016-09-23 18:24 ` Stephen Hemminger
2016-09-23 18:31   ` Michael S. Tsirkin
2016-09-23 20:28     ` Stephen Hemminger
2016-09-25  1:02       ` Michael S. Tsirkin
2016-09-25  1:50         ` Stephen Hemminger
2016-09-25  1:53           ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1474615009-26626-1-git-send-email-maxime.coquelin@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=dev@dpdk.org \
    --cc=huawei.xie@intel.com \
    --cc=mst@redhat.com \
    --cc=stephen@networkplumber.org \
    --cc=vkaplans@redhat.com \
    --cc=yuanhan.liu@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).