From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <jfreiman@redhat.com>
Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28])
 by dpdk.org (Postfix) with ESMTP id A245F37B0
 for <dev@dpdk.org>; Fri,  5 May 2017 15:57:27 +0200 (CEST)
Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com
 [10.5.11.12])
 (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
 (No client certificate requested)
 by mx1.redhat.com (Postfix) with ESMTPS id EF00E23E6C5;
 Fri,  5 May 2017 13:57:26 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com EF00E23E6C5
Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com;
 dmarc=none (p=none dis=none) header.from=redhat.com
Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com;
 spf=pass smtp.mailfrom=jfreiman@redhat.com
DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com EF00E23E6C5
Received: from virtlab417.ml3.eng.bos.redhat.com
 (virtlab417.ml3.eng.bos.redhat.com [10.19.176.175])
 by smtp.corp.redhat.com (Postfix) with ESMTP id A8857785E5;
 Fri,  5 May 2017 13:57:26 +0000 (UTC)
From: Jens Freimann <jfreiman@redhat.com>
To: yuanhan.liu@linux.intel.com
Cc: dev@dpdk.org
Date: Fri,  5 May 2017 09:57:17 -0400
Message-Id: <1493992642-52756-7-git-send-email-jfreiman@redhat.com>
In-Reply-To: <1493992642-52756-1-git-send-email-jfreiman@redhat.com>
References: <1493992642-52756-1-git-send-email-jfreiman@redhat.com>
X-Scanned-By: MIMEDefang 2.79 on 10.5.11.12
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16
 (mx1.redhat.com [10.5.110.29]); Fri, 05 May 2017 13:57:27 +0000 (UTC)
Subject: [dpdk-dev] [RFC PATCH 06/11] vhost: implement virtio 1.1 dequeue
	path
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Fri, 05 May 2017 13:57:28 -0000

From: Yuanhan Liu <yuanhan.liu@linux.intel.com>

Build test only; haven't tested it yet

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Jens Freimann <jfreiman@redhat.com>
---
 lib/librte_vhost/virtio-1.1.h |  23 ++++++
 lib/librte_vhost/virtio_net.c | 181 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+)
 create mode 100644 lib/librte_vhost/virtio-1.1.h

diff --git a/lib/librte_vhost/virtio-1.1.h b/lib/librte_vhost/virtio-1.1.h
new file mode 100644
index 0000000..4241d0a
--- /dev/null
+++ b/lib/librte_vhost/virtio-1.1.h
@@ -0,0 +1,23 @@
+#ifndef __VIRTIO_1_1_H
+#define __VIRTIO_1_1_H
+
+#define __le64	uint64_t
+#define __le32	uint32_t
+#define __le16	uint16_t
+
+#define VRING_DESC_F_NEXT	1
+#define VRING_DESC_F_WRITE	2
+#define VRING_DESC_F_INDIRECT	4
+
+#define BATCH_NOT_FIRST 0x0010
+#define BATCH_NOT_LAST  0x0020
+#define DESC_HW		0x0080
+
+struct vring_desc_1_1 {
+        __le64 addr;
+        __le32 len;
+        __le16 index;
+        __le16 flags;
+};
+
+#endif /* __VIRTIO_1_1_H */
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 48219e0..fd6f200 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -46,6 +46,7 @@
 #include <rte_arp.h>
 
 #include "vhost.h"
+#include "virtio-1.1.h"
 
 #define MAX_PKT_BURST 32
 
@@ -973,6 +974,183 @@ static inline bool __attribute__((always_inline))
 	return true;
 }
 
+static inline uint16_t
+dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	     struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
+	     struct vring_desc_1_1 *descs)
+{
+	struct vring_desc_1_1 *desc;
+	uint64_t desc_addr;
+	uint32_t desc_avail, desc_offset;
+	uint32_t mbuf_avail, mbuf_offset;
+	uint32_t cpy_len;
+	struct rte_mbuf *cur = m, *prev = m;
+	struct virtio_net_hdr *hdr = NULL;
+	uint16_t head_idx = vq->last_used_idx;
+
+	desc = &descs[(head_idx++) & (vq->size - 1)];
+	if (unlikely((desc->len < dev->vhost_hlen)) ||
+			(desc->flags & VRING_DESC_F_INDIRECT))
+		return -1;
+
+	desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+	if (unlikely(!desc_addr))
+		return -1;
+
+	if (virtio_net_with_host_offload(dev)) {
+		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+		rte_prefetch0(hdr);
+	}
+
+	/*
+	 * A virtio driver normally uses at least 2 desc buffers
+	 * for Tx: the first for storing the header, and others
+	 * for storing the data.
+	 */
+	if (likely((desc->len == dev->vhost_hlen) &&
+		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+		desc->flags = 0;
+
+		desc = &descs[(head_idx++) & (vq->size - 1)];
+		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+			return -1;
+
+		desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+		if (unlikely(!desc_addr))
+			return -1;
+
+		desc_offset = 0;
+		desc_avail  = desc->len;
+	} else {
+		desc_avail  = desc->len - dev->vhost_hlen;
+		desc_offset = dev->vhost_hlen;
+	}
+
+	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
+	mbuf_offset = 0;
+	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
+	while (1) {
+		uint64_t hpa;
+
+		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+
+		/*
+		 * A desc buf might across two host physical pages that are
+		 * not continuous. In such case (gpa_to_hpa returns 0), data
+		 * will be copied even though zero copy is enabled.
+		 */
+		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+					desc->addr + desc_offset, cpy_len)))) {
+			cur->data_len = cpy_len;
+			cur->data_off = 0;
+			cur->buf_addr = (void *)(uintptr_t)desc_addr;
+			cur->buf_physaddr = hpa;
+
+			/*
+			 * In zero copy mode, one mbuf can only reference data
+			 * for one or partial of one desc buff.
+			 */
+			mbuf_avail = cpy_len;
+		} else {
+			rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+							   mbuf_offset),
+				(void *)((uintptr_t)(desc_addr + desc_offset)),
+				cpy_len);
+		}
+
+		mbuf_avail  -= cpy_len;
+		mbuf_offset += cpy_len;
+		desc_avail  -= cpy_len;
+		desc_offset += cpy_len;
+
+		/* This desc reaches to its end, get the next one */
+		if (desc_avail == 0) {
+			desc->flags = 0;
+
+			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+				break;
+
+			desc = &descs[(head_idx++) & (vq->size - 1)];
+			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+				return -1;
+
+			desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+			if (unlikely(!desc_addr))
+				return -1;
+
+			rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+			desc_offset = 0;
+			desc_avail  = desc->len;
+
+			PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+		}
+
+		/*
+		 * This mbuf reaches to its end, get a new one
+		 * to hold more data.
+		 */
+		if (mbuf_avail == 0) {
+			cur = rte_pktmbuf_alloc(mbuf_pool);
+			if (unlikely(cur == NULL)) {
+				RTE_LOG(ERR, VHOST_DATA, "Failed to "
+					"allocate memory for mbuf.\n");
+				return -1;
+			}
+
+			prev->next = cur;
+			prev->data_len = mbuf_offset;
+			m->nb_segs += 1;
+			m->pkt_len += mbuf_offset;
+			prev = cur;
+
+			mbuf_offset = 0;
+			mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
+		}
+	}
+	desc->flags = 0;
+
+	prev->data_len = mbuf_offset;
+	m->pkt_len    += mbuf_offset;
+
+	if (hdr)
+		vhost_dequeue_offload(hdr, m);
+
+	vq->last_used_idx = head_idx;
+
+	return 0;
+}
+
+static inline uint16_t
+vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
+			uint16_t count)
+{
+	uint16_t i;
+	uint16_t idx;
+	struct vring_desc_1_1 *desc = vq->desc_1_1;
+
+	for (i = 0; i < count; i++) {
+		idx = vq->last_used_idx & (vq->size - 1);
+		if (!(desc[idx].flags & DESC_HW))
+			break;
+
+		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+		if (unlikely(pkts[i] == NULL)) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to allocate memory for mbuf.\n");
+			break;
+		}
+
+		dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc);
+	}
+
+	return i;
+}
+
 uint16_t
 rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
@@ -1000,6 +1178,9 @@ static inline bool __attribute__((always_inline))
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
+	if (dev->features & (1ULL << VIRTIO_F_VERSION_1_1))
+		return vhost_dequeue_burst_1_1(dev, vq, mbuf_pool, pkts, count);
+
 	if (unlikely(dev->dequeue_zero_copy)) {
 		struct zcopy_mbuf *zmbuf, *next;
 		int nr_updated = 0;
-- 
1.8.3.1