From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id C01462C72 for ; Wed, 21 Jun 2017 04:59:29 +0200 (CEST) Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga105.jf.intel.com with ESMTP; 20 Jun 2017 19:59:29 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.39,367,1493708400"; d="scan'208";a="101830290" Received: from dpdk25.sh.intel.com ([10.67.111.75]) by orsmga002.jf.intel.com with ESMTP; 20 Jun 2017 19:59:28 -0700 From: Tiwei Bie To: dev@dpdk.org Cc: Yuanhan Liu , Jens Freimann Date: Wed, 21 Jun 2017 10:57:42 +0800 Message-Id: <1498013885-102779-7-git-send-email-tiwei.bie@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1498013885-102779-1-git-send-email-tiwei.bie@intel.com> References: <1498013885-102779-1-git-send-email-tiwei.bie@intel.com> Subject: [dpdk-dev] [RFC 06/29] vhost: implement virtio 1.1 dequeue path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Jun 2017 02:59:30 -0000 From: Yuanhan Liu Build test only; haven't tested it yet Signed-off-by: Yuanhan Liu Signed-off-by: Jens Freimann --- lib/librte_vhost/virtio-1.1.h | 23 ++++++ lib/librte_vhost/virtio_net.c | 181 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 lib/librte_vhost/virtio-1.1.h diff --git a/lib/librte_vhost/virtio-1.1.h b/lib/librte_vhost/virtio-1.1.h new file mode 100644 index 0000000..4241d0a --- /dev/null +++ b/lib/librte_vhost/virtio-1.1.h @@ -0,0 +1,23 @@ +#ifndef __VIRTIO_1_1_H +#define __VIRTIO_1_1_H + +#define __le64 uint64_t +#define __le32 uint32_t +#define __le16 uint16_t + +#define VRING_DESC_F_NEXT 1 +#define VRING_DESC_F_WRITE 2 +#define VRING_DESC_F_INDIRECT 4 + +#define BATCH_NOT_FIRST 0x0010 +#define BATCH_NOT_LAST 0x0020 +#define DESC_HW 0x0080 + +struct vring_desc_1_1 { + __le64 addr; + __le32 len; + __le16 index; + __le16 flags; +}; + +#endif /* __VIRTIO_1_1_H */ diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 48219e0..fd6f200 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -46,6 +46,7 @@ #include #include "vhost.h" +#include "virtio-1.1.h" #define MAX_PKT_BURST 32 @@ -973,6 +974,183 @@ mbuf_is_consumed(struct rte_mbuf *m) return true; } +static inline uint16_t +dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, struct rte_mbuf *m, + struct vring_desc_1_1 *descs) +{ + struct vring_desc_1_1 *desc; + uint64_t desc_addr; + uint32_t desc_avail, desc_offset; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; + struct rte_mbuf *cur = m, *prev = m; + struct virtio_net_hdr *hdr = NULL; + uint16_t head_idx = vq->last_used_idx; + + desc = &descs[(head_idx++) & (vq->size - 1)]; + if (unlikely((desc->len < dev->vhost_hlen)) || + (desc->flags & VRING_DESC_F_INDIRECT)) + return -1; + + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + if (virtio_net_with_host_offload(dev)) { + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); + rte_prefetch0(hdr); + } + + /* + * A virtio driver normally uses at least 2 desc buffers + * for Tx: the first for storing the header, and others + * for storing the data. + */ + if (likely((desc->len == dev->vhost_hlen) && + (desc->flags & VRING_DESC_F_NEXT) != 0)) { + desc->flags = 0; + + desc = &descs[(head_idx++) & (vq->size - 1)]; + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) + return -1; + + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + desc_offset = 0; + desc_avail = desc->len; + } else { + desc_avail = desc->len - dev->vhost_hlen; + desc_offset = dev->vhost_hlen; + } + + rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); + + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0); + + mbuf_offset = 0; + mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; + while (1) { + uint64_t hpa; + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + + /* + * A desc buf might across two host physical pages that are + * not continuous. In such case (gpa_to_hpa returns 0), data + * will be copied even though zero copy is enabled. + */ + if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev, + desc->addr + desc_offset, cpy_len)))) { + cur->data_len = cpy_len; + cur->data_off = 0; + cur->buf_addr = (void *)(uintptr_t)desc_addr; + cur->buf_physaddr = hpa; + + /* + * In zero copy mode, one mbuf can only reference data + * for one or partial of one desc buff. + */ + mbuf_avail = cpy_len; + } else { + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, + mbuf_offset), + (void *)((uintptr_t)(desc_addr + desc_offset)), + cpy_len); + } + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + + /* This desc reaches to its end, get the next one */ + if (desc_avail == 0) { + desc->flags = 0; + + if ((desc->flags & VRING_DESC_F_NEXT) == 0) + break; + + desc = &descs[(head_idx++) & (vq->size - 1)]; + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) + return -1; + + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + rte_prefetch0((void *)(uintptr_t)desc_addr); + + desc_offset = 0; + desc_avail = desc->len; + + PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); + } + + /* + * This mbuf reaches to its end, get a new one + * to hold more data. + */ + if (mbuf_avail == 0) { + cur = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(cur == NULL)) { + RTE_LOG(ERR, VHOST_DATA, "Failed to " + "allocate memory for mbuf.\n"); + return -1; + } + + prev->next = cur; + prev->data_len = mbuf_offset; + m->nb_segs += 1; + m->pkt_len += mbuf_offset; + prev = cur; + + mbuf_offset = 0; + mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; + } + } + desc->flags = 0; + + prev->data_len = mbuf_offset; + m->pkt_len += mbuf_offset; + + if (hdr) + vhost_dequeue_offload(hdr, m); + + vq->last_used_idx = head_idx; + + return 0; +} + +static inline uint16_t +vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, + uint16_t count) +{ + uint16_t i; + uint16_t idx; + struct vring_desc_1_1 *desc = vq->desc_1_1; + + for (i = 0; i < count; i++) { + idx = vq->last_used_idx & (vq->size - 1); + if (!(desc[idx].flags & DESC_HW)) + break; + + pkts[i] = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(pkts[i] == NULL)) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); + break; + } + + dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc); + } + + return i; +} + uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) @@ -1000,6 +1178,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (unlikely(vq->enabled == 0)) return 0; + if (dev->features & (1ULL << VIRTIO_F_VERSION_1_1)) + return vhost_dequeue_burst_1_1(dev, vq, mbuf_pool, pkts, count); + if (unlikely(dev->dequeue_zero_copy)) { struct zcopy_mbuf *zmbuf, *next; int nr_updated = 0; -- 2.7.4