From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id A245F37B0 for ; Fri, 5 May 2017 15:57:27 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id EF00E23E6C5; Fri, 5 May 2017 13:57:26 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com EF00E23E6C5 Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jfreiman@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com EF00E23E6C5 Received: from virtlab417.ml3.eng.bos.redhat.com (virtlab417.ml3.eng.bos.redhat.com [10.19.176.175]) by smtp.corp.redhat.com (Postfix) with ESMTP id A8857785E5; Fri, 5 May 2017 13:57:26 +0000 (UTC) From: Jens Freimann To: yuanhan.liu@linux.intel.com Cc: dev@dpdk.org Date: Fri, 5 May 2017 09:57:17 -0400 Message-Id: <1493992642-52756-7-git-send-email-jfreiman@redhat.com> In-Reply-To: <1493992642-52756-1-git-send-email-jfreiman@redhat.com> References: <1493992642-52756-1-git-send-email-jfreiman@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.12 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.29]); Fri, 05 May 2017 13:57:27 +0000 (UTC) Subject: [dpdk-dev] [RFC PATCH 06/11] vhost: implement virtio 1.1 dequeue path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 05 May 2017 13:57:28 -0000 From: Yuanhan Liu Build test only; haven't tested it yet Signed-off-by: Yuanhan Liu Signed-off-by: Jens Freimann --- lib/librte_vhost/virtio-1.1.h | 23 ++++++ lib/librte_vhost/virtio_net.c | 181 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 lib/librte_vhost/virtio-1.1.h diff --git a/lib/librte_vhost/virtio-1.1.h b/lib/librte_vhost/virtio-1.1.h new file mode 100644 index 0000000..4241d0a --- /dev/null +++ b/lib/librte_vhost/virtio-1.1.h @@ -0,0 +1,23 @@ +#ifndef __VIRTIO_1_1_H +#define __VIRTIO_1_1_H + +#define __le64 uint64_t +#define __le32 uint32_t +#define __le16 uint16_t + +#define VRING_DESC_F_NEXT 1 +#define VRING_DESC_F_WRITE 2 +#define VRING_DESC_F_INDIRECT 4 + +#define BATCH_NOT_FIRST 0x0010 +#define BATCH_NOT_LAST 0x0020 +#define DESC_HW 0x0080 + +struct vring_desc_1_1 { + __le64 addr; + __le32 len; + __le16 index; + __le16 flags; +}; + +#endif /* __VIRTIO_1_1_H */ diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 48219e0..fd6f200 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -46,6 +46,7 @@ #include #include "vhost.h" +#include "virtio-1.1.h" #define MAX_PKT_BURST 32 @@ -973,6 +974,183 @@ static inline bool __attribute__((always_inline)) return true; } +static inline uint16_t +dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, struct rte_mbuf *m, + struct vring_desc_1_1 *descs) +{ + struct vring_desc_1_1 *desc; + uint64_t desc_addr; + uint32_t desc_avail, desc_offset; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; + struct rte_mbuf *cur = m, *prev = m; + struct virtio_net_hdr *hdr = NULL; + uint16_t head_idx = vq->last_used_idx; + + desc = &descs[(head_idx++) & (vq->size - 1)]; + if (unlikely((desc->len < dev->vhost_hlen)) || + (desc->flags & VRING_DESC_F_INDIRECT)) + return -1; + + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + if (virtio_net_with_host_offload(dev)) { + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); + rte_prefetch0(hdr); + } + + /* + * A virtio driver normally uses at least 2 desc buffers + * for Tx: the first for storing the header, and others + * for storing the data. + */ + if (likely((desc->len == dev->vhost_hlen) && + (desc->flags & VRING_DESC_F_NEXT) != 0)) { + desc->flags = 0; + + desc = &descs[(head_idx++) & (vq->size - 1)]; + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) + return -1; + + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + desc_offset = 0; + desc_avail = desc->len; + } else { + desc_avail = desc->len - dev->vhost_hlen; + desc_offset = dev->vhost_hlen; + } + + rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); + + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0); + + mbuf_offset = 0; + mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; + while (1) { + uint64_t hpa; + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + + /* + * A desc buf might across two host physical pages that are + * not continuous. In such case (gpa_to_hpa returns 0), data + * will be copied even though zero copy is enabled. + */ + if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev, + desc->addr + desc_offset, cpy_len)))) { + cur->data_len = cpy_len; + cur->data_off = 0; + cur->buf_addr = (void *)(uintptr_t)desc_addr; + cur->buf_physaddr = hpa; + + /* + * In zero copy mode, one mbuf can only reference data + * for one or partial of one desc buff. + */ + mbuf_avail = cpy_len; + } else { + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, + mbuf_offset), + (void *)((uintptr_t)(desc_addr + desc_offset)), + cpy_len); + } + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + + /* This desc reaches to its end, get the next one */ + if (desc_avail == 0) { + desc->flags = 0; + + if ((desc->flags & VRING_DESC_F_NEXT) == 0) + break; + + desc = &descs[(head_idx++) & (vq->size - 1)]; + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) + return -1; + + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + rte_prefetch0((void *)(uintptr_t)desc_addr); + + desc_offset = 0; + desc_avail = desc->len; + + PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); + } + + /* + * This mbuf reaches to its end, get a new one + * to hold more data. + */ + if (mbuf_avail == 0) { + cur = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(cur == NULL)) { + RTE_LOG(ERR, VHOST_DATA, "Failed to " + "allocate memory for mbuf.\n"); + return -1; + } + + prev->next = cur; + prev->data_len = mbuf_offset; + m->nb_segs += 1; + m->pkt_len += mbuf_offset; + prev = cur; + + mbuf_offset = 0; + mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; + } + } + desc->flags = 0; + + prev->data_len = mbuf_offset; + m->pkt_len += mbuf_offset; + + if (hdr) + vhost_dequeue_offload(hdr, m); + + vq->last_used_idx = head_idx; + + return 0; +} + +static inline uint16_t +vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, + uint16_t count) +{ + uint16_t i; + uint16_t idx; + struct vring_desc_1_1 *desc = vq->desc_1_1; + + for (i = 0; i < count; i++) { + idx = vq->last_used_idx & (vq->size - 1); + if (!(desc[idx].flags & DESC_HW)) + break; + + pkts[i] = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(pkts[i] == NULL)) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); + break; + } + + dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc); + } + + return i; +} + uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) @@ -1000,6 +1178,9 @@ static inline bool __attribute__((always_inline)) if (unlikely(vq->enabled == 0)) return 0; + if (dev->features & (1ULL << VIRTIO_F_VERSION_1_1)) + return vhost_dequeue_burst_1_1(dev, vq, mbuf_pool, pkts, count); + if (unlikely(dev->dequeue_zero_copy)) { struct zcopy_mbuf *zmbuf, *next; int nr_updated = 0; -- 1.8.3.1