From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id E11C7A491 for ; Thu, 1 Feb 2018 10:35:57 +0100 (CET) Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 4695F73B25; Thu, 1 Feb 2018 09:35:57 +0000 (UTC) Received: from [10.36.112.31] (ovpn-112-31.ams2.redhat.com [10.36.112.31]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 596596269E; Thu, 1 Feb 2018 09:35:23 +0000 (UTC) To: Jens Freimann , dev@dpdk.org Cc: tiwei.bie@intel.com, yliu@fridaylinux.org, mst@redhat.com References: <20180129141143.13437-1-jfreimann@redhat.com> <20180129141143.13437-13-jfreimann@redhat.com> From: Maxime Coquelin Message-ID: <76b94d32-9521-35b7-c7a2-9aba217b01ba@redhat.com> Date: Thu, 1 Feb 2018 10:35:18 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.5.2 MIME-Version: 1.0 In-Reply-To: <20180129141143.13437-13-jfreimann@redhat.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Thu, 01 Feb 2018 09:35:57 +0000 (UTC) Subject: Re: [dpdk-dev] [PATCH 12/14] vhost: dequeue for packed queues X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Feb 2018 09:35:58 -0000 On 01/29/2018 03:11 PM, Jens Freimann wrote: > Implement code to dequeue and process descriptors from > the vring if VIRTIO_F_PACKED is enabled. > > Check if descriptor was made available by driver by looking at > VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set > the used flag VIRTIO_F_DESC_USED to the current value of the > used wrap counter. > > Used ring wrap counter needs to be toggled when last descriptor is > written out. This allows the host/guest to detect new descriptors even > after the ring has wrapped. > > Signed-off-by: Jens Freimann > --- > lib/librte_vhost/vhost.c | 1 + > lib/librte_vhost/vhost.h | 1 + > lib/librte_vhost/virtio_net.c | 194 ++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 196 insertions(+) > > diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c > index 78913912c..e5f58d9c8 100644 > --- a/lib/librte_vhost/vhost.c > +++ b/lib/librte_vhost/vhost.c > @@ -191,6 +191,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) > > vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; > vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; > + vq->used_wrap_counter = 1; > > vhost_user_iotlb_init(dev, vring_idx); > /* Backends are set to -1 indicating an inactive device. */ > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h > index 8554d51d8..a3d4214b6 100644 > --- a/lib/librte_vhost/vhost.h > +++ b/lib/librte_vhost/vhost.h > @@ -106,6 +106,7 @@ struct vhost_virtqueue { > > struct batch_copy_elem *batch_copy_elems; > uint16_t batch_copy_nb_elems; > + uint32_t used_wrap_counter; > > rte_rwlock_t iotlb_lock; > rte_rwlock_t iotlb_pending_lock; > diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c > index edfab3ba6..5d4cfe8cc 100644 > --- a/lib/librte_vhost/virtio_net.c > +++ b/lib/librte_vhost/virtio_net.c > @@ -19,6 +19,7 @@ > > #include "iotlb.h" > #include "vhost.h" > +#include "virtio-1.1.h" > > #define MAX_PKT_BURST 32 > > @@ -1111,6 +1112,199 @@ restore_mbuf(struct rte_mbuf *m) > } > } > > +static inline uint16_t > +dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, > + struct rte_mempool *mbuf_pool, struct rte_mbuf *m, > + struct vring_desc_1_1 *descs) > +{ > + struct vring_desc_1_1 *desc; > + uint64_t desc_addr; > + uint32_t desc_avail, desc_offset; > + uint32_t mbuf_avail, mbuf_offset; > + uint32_t cpy_len; > + struct rte_mbuf *cur = m, *prev = m; > + struct virtio_net_hdr *hdr = NULL; > + uint16_t head_idx = vq->last_used_idx & (vq->size - 1); > + int wrap_counter = vq->used_wrap_counter; > + > + desc = &descs[vq->last_used_idx & (vq->size - 1)]; > + if (unlikely((desc->len < dev->vhost_hlen)) || > + (desc->flags & VRING_DESC_F_INDIRECT)) > + rte_panic("INDIRECT not supported yet"); Using rte_panic() may not be a good idea here, because a malicious guest could make the vswitch to crash easily. > + > + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); You should use vhost_iova_to_vva() here and everywhere else, otherwise you break IOMMU support. > + if (unlikely(!desc_addr)) > + return -1; > + > + if (virtio_net_with_host_offload(dev)) { > + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); > + rte_prefetch0(hdr); > + } > + > + /* > + * A virtio driver normally uses at least 2 desc buffers > + * for Tx: the first for storing the header, and others > + * for storing the data. > + */ > + if (likely((desc->len == dev->vhost_hlen) && > + (desc->flags & VRING_DESC_F_NEXT) != 0)) { > + if ((++vq->last_used_idx & (vq->size - 1)) == 0) > + toggle_wrap_counter(vq); > + > + desc = &descs[vq->last_used_idx & (vq->size - 1)]; > + > + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) > + rte_panic("INDIRECT not supported yet"); Ditto. > + > + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); > + if (unlikely(!desc_addr)) > + return -1; > + > + desc_offset = 0; > + desc_avail = desc->len; > + } else { > + desc_avail = desc->len - dev->vhost_hlen; > + desc_offset = dev->vhost_hlen; > + } > + > + rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); > + > + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0); > + > + mbuf_offset = 0; > + mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; > + while (1) { > + uint64_t hpa; > + > + cpy_len = RTE_MIN(desc_avail, mbuf_avail); > + > + /* > + * A desc buf might across two host physical pages that are > + * not continuous. In such case (gpa_to_hpa returns 0), data > + * will be copied even though zero copy is enabled. > + */ > + if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev, > + desc->addr + desc_offset, cpy_len)))) { > + cur->data_len = cpy_len; > + cur->data_off = 0; > + cur->buf_addr = (void *)(uintptr_t)desc_addr; > + cur->buf_physaddr = hpa; > + > + /* > + * In zero copy mode, one mbuf can only reference data > + * for one or partial of one desc buff. > + */ > + mbuf_avail = cpy_len; > + } else { > + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, > + mbuf_offset), > + (void *)((uintptr_t)(desc_addr + desc_offset)), > + cpy_len); > + } > + > + mbuf_avail -= cpy_len; > + mbuf_offset += cpy_len; > + desc_avail -= cpy_len; > + desc_offset += cpy_len; > + > + /* This desc reaches to its end, get the next one */ > + if (desc_avail == 0) { > + if ((desc->flags & VRING_DESC_F_NEXT) == 0) > + break; > + > + if ((++vq->last_used_idx & (vq->size - 1)) == 0) > + toggle_wrap_counter(vq); > + > + desc = &descs[vq->last_used_idx & (vq->size - 1)]; > + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) > + rte_panic("INDIRECT not supported yet"); > + > + desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); > + if (unlikely(!desc_addr)) > + return -1; > + > + rte_prefetch0((void *)(uintptr_t)desc_addr); > + > + desc_offset = 0; > + desc_avail = desc->len; > + > + PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); > + } > + > + /* > + * This mbuf reaches to its end, get a new one > + * to hold more data. > + */ > + if (mbuf_avail == 0) { > + cur = rte_pktmbuf_alloc(mbuf_pool); > + if (unlikely(cur == NULL)) { > + RTE_LOG(ERR, VHOST_DATA, "Failed to " > + "allocate memory for mbuf.\n"); > + return -1; > + } > + > + prev->next = cur; > + prev->data_len = mbuf_offset; > + m->nb_segs += 1; > + m->pkt_len += mbuf_offset; > + prev = cur; > + > + mbuf_offset = 0; > + mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; > + } > + } > + > + if (hdr) > + vhost_dequeue_offload(hdr, m); > + > + if ((++vq->last_used_idx & (vq->size - 1)) == 0) > + toggle_wrap_counter(vq); > + > + rte_smp_wmb(); > + _set_desc_used(&descs[head_idx], wrap_counter); > + > + prev->data_len = mbuf_offset; > + m->pkt_len += mbuf_offset; > + > + return 0; > +} > + > +static inline uint16_t > +vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq, > + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, > + uint16_t count) > +{ > + uint16_t i; > + uint16_t idx; > + struct vring_desc_1_1 *desc = vq->desc_1_1; > + int err; > + > + count = RTE_MIN(MAX_PKT_BURST, count); > + for (i = 0; i < count; i++) { > + idx = vq->last_used_idx & (vq->size - 1); > + if (!desc_is_avail(vq, &desc[idx])) > + break; > + rte_smp_rmb(); > + > + pkts[i] = rte_pktmbuf_alloc(mbuf_pool); > + if (unlikely(pkts[i] == NULL)) { > + RTE_LOG(ERR, VHOST_DATA, > + "Failed to allocate memory for mbuf.\n"); > + break; > + } > + > + err = dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc); > + if (unlikely(err)) { > + rte_pktmbuf_free(pkts[i]); > + break; > + } > + } > + > + rte_spinlock_unlock(&vq->access_lock); Where is it locked? It looks unbalanced. > + > + return i; > +} > + > uint16_t > rte_vhost_dequeue_burst(int vid, uint16_t queue_id, > struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) >