From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id 81E7E2BF4 for ; Thu, 1 Feb 2018 11:23:56 +0100 (CET) Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id C236AC05FEC2; Thu, 1 Feb 2018 10:23:55 +0000 (UTC) Received: from localhost (dhcp-192-241.str.redhat.com [10.33.192.241]) by smtp.corp.redhat.com (Postfix) with ESMTPS id E410163B81; Thu, 1 Feb 2018 10:23:46 +0000 (UTC) Date: Thu, 1 Feb 2018 11:23:44 +0100 From: Jens Freimann To: Maxime Coquelin Cc: dev@dpdk.org, tiwei.bie@intel.com, yliu@fridaylinux.org, mst@redhat.com Message-ID: <20180201102344.idetme4qwnhyyxkt@localhost.localdomain> References: <20180129141143.13437-1-jfreimann@redhat.com> <20180129141143.13437-13-jfreimann@redhat.com> <76b94d32-9521-35b7-c7a2-9aba217b01ba@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii; format=flowed Content-Disposition: inline In-Reply-To: <76b94d32-9521-35b7-c7a2-9aba217b01ba@redhat.com> User-Agent: NeoMutt/20171215 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.31]); Thu, 01 Feb 2018 10:23:55 +0000 (UTC) Subject: Re: [dpdk-dev] [PATCH 12/14] vhost: dequeue for packed queues X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Feb 2018 10:23:56 -0000 On Thu, Feb 01, 2018 at 10:35:18AM +0100, Maxime Coquelin wrote: > > >On 01/29/2018 03:11 PM, Jens Freimann wrote: >>Implement code to dequeue and process descriptors from >>the vring if VIRTIO_F_PACKED is enabled. >> >>Check if descriptor was made available by driver by looking at >>VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set >>the used flag VIRTIO_F_DESC_USED to the current value of the >>used wrap counter. >> >>Used ring wrap counter needs to be toggled when last descriptor is >>written out. This allows the host/guest to detect new descriptors even >>after the ring has wrapped. >> >>Signed-off-by: Jens Freimann >>--- >> lib/librte_vhost/vhost.c | 1 + >> lib/librte_vhost/vhost.h | 1 + >> lib/librte_vhost/virtio_net.c | 194 ++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 196 insertions(+) >> >>diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c >>index 78913912c..e5f58d9c8 100644 >>--- a/lib/librte_vhost/vhost.c >>+++ b/lib/librte_vhost/vhost.c >>@@ -191,6 +191,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) >> vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; >> vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; >>+ vq->used_wrap_counter = 1; >> vhost_user_iotlb_init(dev, vring_idx); >> /* Backends are set to -1 indicating an inactive device. */ >>diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h >>index 8554d51d8..a3d4214b6 100644 >>--- a/lib/librte_vhost/vhost.h >>+++ b/lib/librte_vhost/vhost.h >>@@ -106,6 +106,7 @@ struct vhost_virtqueue { >> struct batch_copy_elem *batch_copy_elems; >> uint16_t batch_copy_nb_elems; >>+ uint32_t used_wrap_counter; >> rte_rwlock_t iotlb_lock; >> rte_rwlock_t iotlb_pending_lock; >>diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c >>index edfab3ba6..5d4cfe8cc 100644 >>--- a/lib/librte_vhost/virtio_net.c >>+++ b/lib/librte_vhost/virtio_net.c >>@@ -19,6 +19,7 @@ >> #include "iotlb.h" >> #include "vhost.h" >>+#include "virtio-1.1.h" >> #define MAX_PKT_BURST 32 >>@@ -1111,6 +1112,199 @@ restore_mbuf(struct rte_mbuf *m) >> } >> } >>+static inline uint16_t >>+dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, >>+ struct rte_mempool *mbuf_pool, struct rte_mbuf *m, >>+ struct vring_desc_1_1 *descs) >>+{ >>+ struct vring_desc_1_1 *desc; >>+ uint64_t desc_addr; >>+ uint32_t desc_avail, desc_offset; >>+ uint32_t mbuf_avail, mbuf_offset; >>+ uint32_t cpy_len; >>+ struct rte_mbuf *cur = m, *prev = m; >>+ struct virtio_net_hdr *hdr = NULL; >>+ uint16_t head_idx = vq->last_used_idx & (vq->size - 1); >>+ int wrap_counter = vq->used_wrap_counter; >>+ >>+ desc = &descs[vq->last_used_idx & (vq->size - 1)]; >>+ if (unlikely((desc->len < dev->vhost_hlen)) || >>+ (desc->flags & VRING_DESC_F_INDIRECT)) >>+ rte_panic("INDIRECT not supported yet"); > >Using rte_panic() may not be a good idea here, because a malicious guest >could make the vswitch to crash easily. Good point. It was for debugging only, I will remove it. >>+ >>+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); > >You should use vhost_iova_to_vva() here and everywhere else, otherwise >you break IOMMU support. Yes, I'll change it. >>+ if (unlikely(!desc_addr)) >>+ return -1; >>+ >>+ if (virtio_net_with_host_offload(dev)) { >>+ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); >>+ rte_prefetch0(hdr); >>+ } >>+ >>+ /* >>+ * A virtio driver normally uses at least 2 desc buffers >>+ * for Tx: the first for storing the header, and others >>+ * for storing the data. >>+ */ >>+ if (likely((desc->len == dev->vhost_hlen) && >>+ (desc->flags & VRING_DESC_F_NEXT) != 0)) { >>+ if ((++vq->last_used_idx & (vq->size - 1)) == 0) >>+ toggle_wrap_counter(vq); >>+ >>+ desc = &descs[vq->last_used_idx & (vq->size - 1)]; >>+ >>+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) >>+ rte_panic("INDIRECT not supported yet"); >Ditto. > >>+ >>+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); >>+ if (unlikely(!desc_addr)) >>+ return -1; >>+ >>+ desc_offset = 0; >>+ desc_avail = desc->len; >>+ } else { >>+ desc_avail = desc->len - dev->vhost_hlen; >>+ desc_offset = dev->vhost_hlen; >>+ } >>+ >>+ rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); >>+ >>+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0); >>+ >>+ mbuf_offset = 0; >>+ mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; >>+ while (1) { >>+ uint64_t hpa; >>+ >>+ cpy_len = RTE_MIN(desc_avail, mbuf_avail); >>+ >>+ /* >>+ * A desc buf might across two host physical pages that are >>+ * not continuous. In such case (gpa_to_hpa returns 0), data >>+ * will be copied even though zero copy is enabled. >>+ */ >>+ if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev, >>+ desc->addr + desc_offset, cpy_len)))) { >>+ cur->data_len = cpy_len; >>+ cur->data_off = 0; >>+ cur->buf_addr = (void *)(uintptr_t)desc_addr; >>+ cur->buf_physaddr = hpa; >>+ >>+ /* >>+ * In zero copy mode, one mbuf can only reference data >>+ * for one or partial of one desc buff. >>+ */ >>+ mbuf_avail = cpy_len; >>+ } else { >>+ rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, >>+ mbuf_offset), >>+ (void *)((uintptr_t)(desc_addr + desc_offset)), >>+ cpy_len); >>+ } >>+ >>+ mbuf_avail -= cpy_len; >>+ mbuf_offset += cpy_len; >>+ desc_avail -= cpy_len; >>+ desc_offset += cpy_len; >>+ >>+ /* This desc reaches to its end, get the next one */ >>+ if (desc_avail == 0) { >>+ if ((desc->flags & VRING_DESC_F_NEXT) == 0) >>+ break; >>+ >>+ if ((++vq->last_used_idx & (vq->size - 1)) == 0) >>+ toggle_wrap_counter(vq); >>+ >>+ desc = &descs[vq->last_used_idx & (vq->size - 1)]; >>+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) >>+ rte_panic("INDIRECT not supported yet"); >>+ >>+ desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); >>+ if (unlikely(!desc_addr)) >>+ return -1; >>+ >>+ rte_prefetch0((void *)(uintptr_t)desc_addr); >>+ >>+ desc_offset = 0; >>+ desc_avail = desc->len; >>+ >>+ PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); >>+ } >>+ >>+ /* >>+ * This mbuf reaches to its end, get a new one >>+ * to hold more data. >>+ */ >>+ if (mbuf_avail == 0) { >>+ cur = rte_pktmbuf_alloc(mbuf_pool); >>+ if (unlikely(cur == NULL)) { >>+ RTE_LOG(ERR, VHOST_DATA, "Failed to " >>+ "allocate memory for mbuf.\n"); >>+ return -1; >>+ } >>+ >>+ prev->next = cur; >>+ prev->data_len = mbuf_offset; >>+ m->nb_segs += 1; >>+ m->pkt_len += mbuf_offset; >>+ prev = cur; >>+ >>+ mbuf_offset = 0; >>+ mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; >>+ } >>+ } >>+ >>+ if (hdr) >>+ vhost_dequeue_offload(hdr, m); >>+ >>+ if ((++vq->last_used_idx & (vq->size - 1)) == 0) >>+ toggle_wrap_counter(vq); >>+ >>+ rte_smp_wmb(); >>+ _set_desc_used(&descs[head_idx], wrap_counter); >>+ >>+ prev->data_len = mbuf_offset; >>+ m->pkt_len += mbuf_offset; >>+ >>+ return 0; >>+} >>+ >>+static inline uint16_t >>+vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq, >>+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, >>+ uint16_t count) >>+{ >>+ uint16_t i; >>+ uint16_t idx; >>+ struct vring_desc_1_1 *desc = vq->desc_1_1; >>+ int err; >>+ >>+ count = RTE_MIN(MAX_PKT_BURST, count); >>+ for (i = 0; i < count; i++) { >>+ idx = vq->last_used_idx & (vq->size - 1); >>+ if (!desc_is_avail(vq, &desc[idx])) >>+ break; >>+ rte_smp_rmb(); >>+ >>+ pkts[i] = rte_pktmbuf_alloc(mbuf_pool); >>+ if (unlikely(pkts[i] == NULL)) { >>+ RTE_LOG(ERR, VHOST_DATA, >>+ "Failed to allocate memory for mbuf.\n"); >>+ break; >>+ } >>+ >>+ err = dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc); >>+ if (unlikely(err)) { >>+ rte_pktmbuf_free(pkts[i]); >>+ break; >>+ } >>+ } >>+ >>+ rte_spinlock_unlock(&vq->access_lock); > >Where is it locked? It looks unbalanced. It is locked in the caller rte_vhost_dequeue_burst() and we return immediately. But I could change it to lock and unlock right here. Thanks for the review! regards, Jens