From: Jens Freimann <jfreiman@redhat.com>
To: yuanhan.liu@linux.intel.com
Cc: dev@dpdk.org
Subject: [dpdk-dev] [RFC PATCH 06/11] vhost: implement virtio 1.1 dequeue path
Date: Fri, 5 May 2017 09:57:17 -0400 [thread overview]
Message-ID: <1493992642-52756-7-git-send-email-jfreiman@redhat.com> (raw)
In-Reply-To: <1493992642-52756-1-git-send-email-jfreiman@redhat.com>
From: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Build test only; haven't tested it yet
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Jens Freimann <jfreiman@redhat.com>
---
lib/librte_vhost/virtio-1.1.h | 23 ++++++
lib/librte_vhost/virtio_net.c | 181 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 204 insertions(+)
create mode 100644 lib/librte_vhost/virtio-1.1.h
diff --git a/lib/librte_vhost/virtio-1.1.h b/lib/librte_vhost/virtio-1.1.h
new file mode 100644
index 0000000..4241d0a
--- /dev/null
+++ b/lib/librte_vhost/virtio-1.1.h
@@ -0,0 +1,23 @@
+#ifndef __VIRTIO_1_1_H
+#define __VIRTIO_1_1_H
+
+#define __le64 uint64_t
+#define __le32 uint32_t
+#define __le16 uint16_t
+
+#define VRING_DESC_F_NEXT 1
+#define VRING_DESC_F_WRITE 2
+#define VRING_DESC_F_INDIRECT 4
+
+#define BATCH_NOT_FIRST 0x0010
+#define BATCH_NOT_LAST 0x0020
+#define DESC_HW 0x0080
+
+struct vring_desc_1_1 {
+ __le64 addr;
+ __le32 len;
+ __le16 index;
+ __le16 flags;
+};
+
+#endif /* __VIRTIO_1_1_H */
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 48219e0..fd6f200 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -46,6 +46,7 @@
#include <rte_arp.h>
#include "vhost.h"
+#include "virtio-1.1.h"
#define MAX_PKT_BURST 32
@@ -973,6 +974,183 @@ static inline bool __attribute__((always_inline))
return true;
}
+static inline uint16_t
+dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
+ struct vring_desc_1_1 *descs)
+{
+ struct vring_desc_1_1 *desc;
+ uint64_t desc_addr;
+ uint32_t desc_avail, desc_offset;
+ uint32_t mbuf_avail, mbuf_offset;
+ uint32_t cpy_len;
+ struct rte_mbuf *cur = m, *prev = m;
+ struct virtio_net_hdr *hdr = NULL;
+ uint16_t head_idx = vq->last_used_idx;
+
+ desc = &descs[(head_idx++) & (vq->size - 1)];
+ if (unlikely((desc->len < dev->vhost_hlen)) ||
+ (desc->flags & VRING_DESC_F_INDIRECT))
+ return -1;
+
+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ if (virtio_net_with_host_offload(dev)) {
+ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+ rte_prefetch0(hdr);
+ }
+
+ /*
+ * A virtio driver normally uses at least 2 desc buffers
+ * for Tx: the first for storing the header, and others
+ * for storing the data.
+ */
+ if (likely((desc->len == dev->vhost_hlen) &&
+ (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+ desc->flags = 0;
+
+ desc = &descs[(head_idx++) & (vq->size - 1)];
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+ return -1;
+
+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ desc_offset = 0;
+ desc_avail = desc->len;
+ } else {
+ desc_avail = desc->len - dev->vhost_hlen;
+ desc_offset = dev->vhost_hlen;
+ }
+
+ rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
+ mbuf_offset = 0;
+ mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
+ while (1) {
+ uint64_t hpa;
+
+ cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+
+ /*
+ * A desc buf might across two host physical pages that are
+ * not continuous. In such case (gpa_to_hpa returns 0), data
+ * will be copied even though zero copy is enabled.
+ */
+ if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+ desc->addr + desc_offset, cpy_len)))) {
+ cur->data_len = cpy_len;
+ cur->data_off = 0;
+ cur->buf_addr = (void *)(uintptr_t)desc_addr;
+ cur->buf_physaddr = hpa;
+
+ /*
+ * In zero copy mode, one mbuf can only reference data
+ * for one or partial of one desc buff.
+ */
+ mbuf_avail = cpy_len;
+ } else {
+ rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+ mbuf_offset),
+ (void *)((uintptr_t)(desc_addr + desc_offset)),
+ cpy_len);
+ }
+
+ mbuf_avail -= cpy_len;
+ mbuf_offset += cpy_len;
+ desc_avail -= cpy_len;
+ desc_offset += cpy_len;
+
+ /* This desc reaches to its end, get the next one */
+ if (desc_avail == 0) {
+ desc->flags = 0;
+
+ if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+ break;
+
+ desc = &descs[(head_idx++) & (vq->size - 1)];
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+ return -1;
+
+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+ desc_offset = 0;
+ desc_avail = desc->len;
+
+ PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+ }
+
+ /*
+ * This mbuf reaches to its end, get a new one
+ * to hold more data.
+ */
+ if (mbuf_avail == 0) {
+ cur = rte_pktmbuf_alloc(mbuf_pool);
+ if (unlikely(cur == NULL)) {
+ RTE_LOG(ERR, VHOST_DATA, "Failed to "
+ "allocate memory for mbuf.\n");
+ return -1;
+ }
+
+ prev->next = cur;
+ prev->data_len = mbuf_offset;
+ m->nb_segs += 1;
+ m->pkt_len += mbuf_offset;
+ prev = cur;
+
+ mbuf_offset = 0;
+ mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
+ }
+ }
+ desc->flags = 0;
+
+ prev->data_len = mbuf_offset;
+ m->pkt_len += mbuf_offset;
+
+ if (hdr)
+ vhost_dequeue_offload(hdr, m);
+
+ vq->last_used_idx = head_idx;
+
+ return 0;
+}
+
+static inline uint16_t
+vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
+ uint16_t count)
+{
+ uint16_t i;
+ uint16_t idx;
+ struct vring_desc_1_1 *desc = vq->desc_1_1;
+
+ for (i = 0; i < count; i++) {
+ idx = vq->last_used_idx & (vq->size - 1);
+ if (!(desc[idx].flags & DESC_HW))
+ break;
+
+ pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+ if (unlikely(pkts[i] == NULL)) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "Failed to allocate memory for mbuf.\n");
+ break;
+ }
+
+ dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc);
+ }
+
+ return i;
+}
+
uint16_t
rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
@@ -1000,6 +1178,9 @@ static inline bool __attribute__((always_inline))
if (unlikely(vq->enabled == 0))
return 0;
+ if (dev->features & (1ULL << VIRTIO_F_VERSION_1_1))
+ return vhost_dequeue_burst_1_1(dev, vq, mbuf_pool, pkts, count);
+
if (unlikely(dev->dequeue_zero_copy)) {
struct zcopy_mbuf *zmbuf, *next;
int nr_updated = 0;
--
1.8.3.1
next prev parent reply other threads:[~2017-05-05 13:57 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-05-05 13:57 [dpdk-dev] [RFC PATCH 00/11] net/virtio: packed ring layout Jens Freimann
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 01/11] net/virtio: vring init for 1.1 Jens Freimann
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 02/11] net/virtio: implement 1.1 guest Tx Jens Freimann
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 03/11] net/virtio-user: add option to enable 1.1 Jens Freimann
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 04/11] vhost: enable 1.1 for testing Jens Freimann
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 05/11] vhost: set desc addr for 1.1 Jens Freimann
2017-05-05 13:57 ` Jens Freimann [this message]
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 07/11] vhost: mark desc being used Jens Freimann
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 08/11] xxx: batch the desc_hw update? Jens Freimann
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 09/11] xxx: virtio: remove overheads Jens Freimann
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 10/11] vhost: prefetch desc Jens Freimann
2017-05-05 13:57 ` [dpdk-dev] [RFC PATCH 11/11] add virtio 1.1 test guide Jens Freimann
2017-05-08 5:02 ` [dpdk-dev] [RFC PATCH 00/11] net/virtio: packed ring layout Yuanhan Liu
2017-05-08 7:36 ` Jens Freimann
2017-05-17 11:30 ` Jens Freimann
2017-05-18 14:24 ` Yuanhan Liu
2017-05-22 9:14 ` Yuanhan Liu
2017-05-22 9:23 ` Jens Freimann
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1493992642-52756-7-git-send-email-jfreiman@redhat.com \
--to=jfreiman@redhat.com \
--cc=dev@dpdk.org \
--cc=yuanhan.liu@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).