From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by dpdk.org (Postfix) with ESMTP id AB785AAC8 for ; Fri, 16 Mar 2018 16:22:12 +0100 (CET) Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 3BDBD4023156; Fri, 16 Mar 2018 15:22:12 +0000 (UTC) Received: from localhost (dhcp-192-241.str.redhat.com [10.33.192.241]) by smtp.corp.redhat.com (Postfix) with ESMTPS id CA4D9C1FE0; Fri, 16 Mar 2018 15:22:11 +0000 (UTC) From: Jens Freimann To: dev@dpdk.org Cc: tiwei.bie@intel.com, yliu@fridaylinux.org, maxime.coquelin@redhat.com, mst@redhat.com Date: Fri, 16 Mar 2018 16:21:15 +0100 Message-Id: <20180316152120.13199-13-jfreimann@redhat.com> In-Reply-To: <20180316152120.13199-1-jfreimann@redhat.com> References: <20180316152120.13199-1-jfreimann@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.11.54.5 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.6]); Fri, 16 Mar 2018 15:22:12 +0000 (UTC) X-Greylist: inspected by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.6]); Fri, 16 Mar 2018 15:22:12 +0000 (UTC) for IP:'10.11.54.5' DOMAIN:'int-mx05.intmail.prod.int.rdu2.redhat.com' HELO:'smtp.corp.redhat.com' FROM:'jfreimann@redhat.com' RCPT:'' Subject: [dpdk-dev] [PATCH 12/17] vhost: dequeue for packed queues X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 16 Mar 2018 15:22:12 -0000 Implement code to dequeue and process descriptors from the vring if VIRTIO_F_PACKED is enabled. Check if descriptor was made available by driver by looking at VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set the used flag VIRTIO_F_DESC_USED to the current value of the used wrap counter. Used ring wrap counter needs to be toggled when last descriptor is written out. This allows the host/guest to detect new descriptors even after the ring has wrapped. Signed-off-by: Jens Freimann --- lib/librte_vhost/vhost.c | 1 + lib/librte_vhost/vhost.h | 1 + lib/librte_vhost/virtio_net.c | 228 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 230 insertions(+) diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index a300812..8cba10d 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -198,6 +198,7 @@ struct virtio_net * vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; + vq->used_wrap_counter = 1; vhost_user_iotlb_init(dev, vring_idx); /* Backends are set to -1 indicating an inactive device. */ diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index d35c4b1..f77fefe 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -108,6 +108,7 @@ struct vhost_virtqueue { struct batch_copy_elem *batch_copy_elems; uint16_t batch_copy_nb_elems; + uint32_t used_wrap_counter; rte_rwlock_t iotlb_lock; rte_rwlock_t iotlb_pending_lock; diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 700aca7..8f59e4f 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -19,6 +19,7 @@ #include "iotlb.h" #include "vhost.h" +#include "virtio-1.1.h" #define MAX_PKT_BURST 32 @@ -1118,6 +1119,233 @@ } } +static inline uint16_t +dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, struct rte_mbuf *m, + struct vring_desc_packed *descs) +{ + struct vring_desc_packed *desc; + uint64_t desc_addr; + uint32_t desc_avail, desc_offset; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; + struct rte_mbuf *cur = m, *prev = m; + struct virtio_net_hdr *hdr = NULL; + uint16_t head_idx = vq->last_used_idx & (vq->size - 1); + int wrap_counter = vq->used_wrap_counter; + int rc = 0; + + rte_spinlock_lock(&vq->access_lock); + + if (unlikely(vq->enabled == 0)) + goto out; + + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_lock(vq); + + desc = &descs[vq->last_used_idx & (vq->size - 1)]; + if (unlikely((desc->len < dev->vhost_hlen)) || + (desc->flags & VRING_DESC_F_INDIRECT)) { + RTE_LOG(ERR, VHOST_DATA, + "INDIRECT not supported yet\n"); + rc = -1; + goto out; + } + + desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, + sizeof(*desc), VHOST_ACCESS_RO); + + if (unlikely(!desc_addr)) { + rc = -1; + goto out; + } + + if (virtio_net_with_host_offload(dev)) { + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); + rte_prefetch0(hdr); + } + + /* + * A virtio driver normally uses at least 2 desc buffers + * for Tx: the first for storing the header, and others + * for storing the data. + */ + if (likely((desc->len == dev->vhost_hlen) && + (desc->flags & VRING_DESC_F_NEXT) != 0)) { + if ((++vq->last_used_idx & (vq->size - 1)) == 0) + toggle_wrap_counter(vq); + + desc = &descs[vq->last_used_idx & (vq->size - 1)]; + + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) { + RTE_LOG(ERR, VHOST_DATA, + "INDIRECT not supported yet\n"); + rc = -1; + goto out; + } + + desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, + sizeof(*desc), VHOST_ACCESS_RO); + if (unlikely(!desc_addr)) { + rc = -1; + goto out; + } + + desc_offset = 0; + desc_avail = desc->len; + } else { + desc_avail = desc->len - dev->vhost_hlen; + desc_offset = dev->vhost_hlen; + } + + rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); + + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0); + + mbuf_offset = 0; + mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; + while (1) { + uint64_t hpa; + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + + /* + * A desc buf might across two host physical pages that are + * not continuous. In such case (gpa_to_hpa returns 0), data + * will be copied even though zero copy is enabled. + */ + if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev, + desc->addr + desc_offset, cpy_len)))) { + cur->data_len = cpy_len; + cur->data_off = 0; + cur->buf_addr = (void *)(uintptr_t)desc_addr; + cur->buf_physaddr = hpa; + + /* + * In zero copy mode, one mbuf can only reference data + * for one or partial of one desc buff. + */ + mbuf_avail = cpy_len; + } else { + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, + mbuf_offset), + (void *)((uintptr_t)(desc_addr + desc_offset)), + cpy_len); + } + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + + /* This desc reaches to its end, get the next one */ + if (desc_avail == 0) { + if ((desc->flags & VRING_DESC_F_NEXT) == 0) + break; + + if ((++vq->last_used_idx & (vq->size - 1)) == 0) + toggle_wrap_counter(vq); + + desc = &descs[vq->last_used_idx & (vq->size - 1)]; + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) { + RTE_LOG(ERR, VHOST_DATA, "INDIRECT not supported yet"); + } + + desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, + sizeof(*desc), VHOST_ACCESS_RO); + if (unlikely(!desc_addr)) { + rc = -1; + goto out; + } + + rte_prefetch0((void *)(uintptr_t)desc_addr); + + desc_offset = 0; + desc_avail = desc->len; + + PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); + } + + /* + * This mbuf reaches to its end, get a new one + * to hold more data. + */ + if (mbuf_avail == 0) { + cur = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(cur == NULL)) { + RTE_LOG(ERR, VHOST_DATA, "Failed to " + "allocate memory for mbuf.\n"); + rc = -1; + goto out; + } + + prev->next = cur; + prev->data_len = mbuf_offset; + m->nb_segs += 1; + m->pkt_len += mbuf_offset; + prev = cur; + + mbuf_offset = 0; + mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; + } + } + + if (hdr) + vhost_dequeue_offload(hdr, m); + + if ((++vq->last_used_idx & (vq->size - 1)) == 0) + toggle_wrap_counter(vq); + + rte_smp_wmb(); + _set_desc_used(&descs[head_idx], wrap_counter); + + prev->data_len = mbuf_offset; + m->pkt_len += mbuf_offset; + +out: + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_unlock(vq); + rte_spinlock_unlock(&vq->access_lock); + + return rc; +} + +static inline uint16_t +vhost_dequeue_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, + uint16_t count) +{ + uint16_t i; + uint16_t idx; + struct vring_desc_packed *desc = vq->desc_packed; + int err; + + count = RTE_MIN(MAX_PKT_BURST, count); + for (i = 0; i < count; i++) { + idx = vq->last_used_idx & (vq->size - 1); + if (!desc_is_avail(vq, &desc[idx])) + break; + rte_smp_rmb(); + + pkts[i] = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(pkts[i] == NULL)) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); + break; + } + + err = dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc); + if (unlikely(err)) { + rte_pktmbuf_free(pkts[i]); + break; + } + } + + rte_spinlock_unlock(&vq->access_lock); + + return i; +} + uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) -- 1.8.3.1