DPDK patches and discussions
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: huawei.xie@intel.com, yuanhan.liu@linux.intel.com
Cc: dev@dpdk.org, vkaplans@redhat.com, mst@redhat.com,
	Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [dpdk-dev] [RFC] vhost: Add indirect descriptors support to the TX path
Date: Tue, 12 Jul 2016 16:32:12 +0200	[thread overview]
Message-ID: <1468333932-26509-1-git-send-email-maxime.coquelin@redhat.com> (raw)

Indirect descriptors are usually supported by virtio-net devices,
allowing to dispatch a large number of large requests.

When the virtio device sends a packet using indirect descriptors,
only one slot is used in the ring, even for large packets.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
I have a two questions regarding the implementation of this feature:

1. Should I add a check to ensure the indirect feature is supported
(i.e. the negociation succeeded) when having an indirect desc?

2. Should I check in copy_desc_to_mbuf() that we don't have a nested
indirect descriptor?

Both these sanity checks are recommended from the virtio spec, but
since it is to be done in the hot path, it may introduce some
performance penalties.

Note that the first check is not done in the Kernel vhost driver, whereas
the second one is.

Cheers,
Maxime
---
 lib/librte_vhost/vhost_rxtx.c | 31 ++++++++++++++++++++++---------
 lib/librte_vhost/virtio-net.c |  3 ++-
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 15ca956..74cbc75 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -669,8 +669,8 @@ make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
 }
 
 static inline int __attribute__((always_inline))
-copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct rte_mbuf *m, uint16_t desc_idx,
+copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
+		  uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
 		  struct rte_mempool *mbuf_pool)
 {
 	struct vring_desc *desc;
@@ -683,7 +683,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	/* A counter to avoid desc dead loop chain */
 	uint32_t nr_desc = 1;
 
-	desc = &vq->desc[desc_idx];
+	desc = &descs[desc_idx];
 	if (unlikely(desc->len < dev->vhost_hlen))
 		return -1;
 
@@ -698,7 +698,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	 */
 	if (likely((desc->len == dev->vhost_hlen) &&
 		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
-		desc = &vq->desc[desc->next];
+		desc = &descs[desc->next];
 
 		desc_addr = gpa_to_vva(dev, desc->addr);
 		rte_prefetch0((void *)(uintptr_t)desc_addr);
@@ -731,10 +731,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
 				break;
 
-			if (unlikely(desc->next >= vq->size ||
-				     ++nr_desc >= vq->size))
+			if (unlikely(desc->next >= max_desc ||
+				     ++nr_desc >= max_desc))
 				return -1;
-			desc = &vq->desc[desc->next];
+			desc = &descs[desc->next];
 
 			desc_addr = gpa_to_vva(dev, desc->addr);
 			rte_prefetch0((void *)(uintptr_t)desc_addr);
@@ -859,19 +859,32 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	/* Prefetch descriptor index. */
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
 	for (i = 0; i < count; i++) {
+		struct vring_desc *desc;
+		uint16_t size, idx;
 		int err;
 
 		if (likely(i + 1 < count))
 			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
 
+		if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
+			desc = (struct vring_desc *)gpa_to_vva(dev,
+					vq->desc[desc_indexes[i]].addr);
+			rte_prefetch0(desc);
+			size = vq->desc[desc_indexes[i]].len / sizeof(*desc);
+			idx = 0;
+		} else {
+			desc = vq->desc;
+			size = vq->size;
+			idx = desc_indexes[i];
+		}
+
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
 			RTE_LOG(ERR, VHOST_DATA,
 				"Failed to allocate memory for mbuf.\n");
 			break;
 		}
-		err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i],
-					mbuf_pool);
+		err = copy_desc_to_mbuf(dev, desc, size, pkts[i], idx, mbuf_pool);
 		if (unlikely(err)) {
 			rte_pktmbuf_free(pkts[i]);
 			break;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 1785695..0304812 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -76,7 +76,8 @@ struct virtio_net_device_ops const *notify_ops;
 				(1ULL << VIRTIO_NET_F_CSUM)    | \
 				(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
 				(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
-				(1ULL << VIRTIO_NET_F_GUEST_TSO6))
+				(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+				(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
 
 static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;

-- 
2.7.4

             reply	other threads:[~2016-07-12 14:32 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-12 14:32 Maxime Coquelin [this message]
2016-08-03 14:03 ` Yuanhan Liu
2016-08-05 12:18   ` Maxime Coquelin
2016-09-21  2:41     ` Yuanhan Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1468333932-26509-1-git-send-email-maxime.coquelin@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=dev@dpdk.org \
    --cc=huawei.xie@intel.com \
    --cc=mst@redhat.com \
    --cc=vkaplans@redhat.com \
    --cc=yuanhan.liu@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).