From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by dpdk.org (Postfix) with ESMTP id 4843E2A66 for ; Fri, 6 Apr 2018 12:07:35 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx03.intmail.prod.int.rdu2.redhat.com [10.11.54.3]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id DD19B40704BE; Fri, 6 Apr 2018 10:07:34 +0000 (UTC) Received: from localhost (dhcp-192-241.str.redhat.com [10.33.192.241]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 0999A10B009B; Fri, 6 Apr 2018 10:07:27 +0000 (UTC) Date: Fri, 6 Apr 2018 12:07:26 +0200 From: Jens Freimann To: Maxime Coquelin Cc: dev@dpdk.org, tiwei.bie@intel.com, yliu@fridaylinux.org, mst@redhat.com Message-ID: <20180406100726.sfgirc43ewivfrgc@dhcp-192-241.str.redhat.com> References: <20180405101031.26468-1-jfreimann@redhat.com> <20180405101031.26468-15-jfreimann@redhat.com> <222c933c-831d-c562-1e21-df7e788bb783@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii; format=flowed Content-Disposition: inline In-Reply-To: <222c933c-831d-c562-1e21-df7e788bb783@redhat.com> User-Agent: NeoMutt/20180223 X-Scanned-By: MIMEDefang 2.78 on 10.11.54.3 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.7]); Fri, 06 Apr 2018 10:07:34 +0000 (UTC) X-Greylist: inspected by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.7]); Fri, 06 Apr 2018 10:07:34 +0000 (UTC) for IP:'10.11.54.3' DOMAIN:'int-mx03.intmail.prod.int.rdu2.redhat.com' HELO:'smtp.corp.redhat.com' FROM:'jfreimann@redhat.com' RCPT:'' Subject: Re: [dpdk-dev] [PATCH v3 14/21] vhost: dequeue for packed queues X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 06 Apr 2018 10:07:35 -0000 On Fri, Apr 06, 2018 at 11:30:10AM +0200, Maxime Coquelin wrote: > > >On 04/05/2018 12:10 PM, Jens Freimann wrote: >>Implement code to dequeue and process descriptors from >>the vring if VIRTIO_F_RING_PACKED is enabled. >> >>Check if descriptor was made available by driver by looking at >>VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set >>the used flag VIRTIO_F_DESC_USED to the current value of the >>used wrap counter. >> >>Used ring wrap counter needs to be toggled when last descriptor is >>written out. This allows the host/guest to detect new descriptors even >>after the ring has wrapped. >> >>Signed-off-by: Jens Freimann >>--- >> lib/librte_vhost/vhost.c | 1 + >> lib/librte_vhost/vhost.h | 1 + >> lib/librte_vhost/virtio_net.c | 228 ++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 230 insertions(+) >> >>diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c >>index 1f17cdd75..eb5a98875 100644 >>--- a/lib/librte_vhost/vhost.c >>+++ b/lib/librte_vhost/vhost.c >>@@ -185,6 +185,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) >> vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; >> vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; >>+ vq->used_wrap_counter = 1; >> vhost_user_iotlb_init(dev, vring_idx); >> /* Backends are set to -1 indicating an inactive device. */ >>diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h >>index 20d78f883..c8aa946fd 100644 >>--- a/lib/librte_vhost/vhost.h >>+++ b/lib/librte_vhost/vhost.h >>@@ -112,6 +112,7 @@ struct vhost_virtqueue { >> struct batch_copy_elem *batch_copy_elems; >> uint16_t batch_copy_nb_elems; >>+ uint16_t used_wrap_counter; >> rte_rwlock_t iotlb_lock; >> rte_rwlock_t iotlb_pending_lock; >>diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c >>index ed7198dbb..7eea1da04 100644 >>--- a/lib/librte_vhost/virtio_net.c >>+++ b/lib/librte_vhost/virtio_net.c >>@@ -19,6 +19,7 @@ >> #include "iotlb.h" >> #include "vhost.h" >>+#include "virtio-1.1.h" >> #define MAX_PKT_BURST 32 >>@@ -1118,6 +1119,233 @@ restore_mbuf(struct rte_mbuf *m) >> } >> } >>+static inline uint16_t >>+dequeue_desc_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, >>+ struct rte_mempool *mbuf_pool, struct rte_mbuf *m, >>+ struct vring_desc_packed *descs) >>+{ >>+ struct vring_desc_packed *desc; >>+ uint64_t desc_addr; >>+ uint32_t desc_avail, desc_offset; >>+ uint32_t mbuf_avail, mbuf_offset; >>+ uint32_t cpy_len; >>+ struct rte_mbuf *cur = m, *prev = m; >>+ struct virtio_net_hdr *hdr = NULL; >>+ uint16_t head_idx = vq->last_used_idx & (vq->size - 1); >>+ int wrap_counter = vq->used_wrap_counter; >>+ int rc = 0; >>+ >>+ rte_spinlock_lock(&vq->access_lock); >>+ >>+ if (unlikely(vq->enabled == 0)) >>+ goto out; > >It is unbalanced as it would unlock iotlb_rd_lock that isn't locked yet. yes, will fix > >>+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) >>+ vhost_user_iotlb_rd_lock(vq); >>+ >>+ desc = &descs[vq->last_used_idx & (vq->size - 1)]; >>+ if (unlikely((desc->len < dev->vhost_hlen)) || >>+ (desc->flags & VRING_DESC_F_INDIRECT)) { >>+ RTE_LOG(ERR, VHOST_DATA, >>+ "INDIRECT not supported yet\n"); >>+ rc = -1; >>+ goto out; >>+ } >>+ >>+ desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, >>+ sizeof(*desc), VHOST_ACCESS_RO); >>+ >>+ if (unlikely(!desc_addr)) { >>+ rc = -1; >>+ goto out; >>+ } >>+ >>+ if (virtio_net_with_host_offload(dev)) { >>+ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); >>+ rte_prefetch0(hdr); >>+ } >>+ >>+ /* >>+ * A virtio driver normally uses at least 2 desc buffers >>+ * for Tx: the first for storing the header, and others >>+ * for storing the data. >>+ */ >>+ if (likely((desc->len == dev->vhost_hlen) && >>+ (desc->flags & VRING_DESC_F_NEXT) != 0)) { >>+ if ((++vq->last_used_idx & (vq->size - 1)) == 0) >>+ toggle_wrap_counter(vq); >>+ >>+ desc = &descs[vq->last_used_idx & (vq->size - 1)]; >>+ >>+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) { >>+ RTE_LOG(ERR, VHOST_DATA, >>+ "INDIRECT not supported yet\n"); >>+ rc = -1; >>+ goto out; >>+ } >>+ >>+ desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, >>+ sizeof(*desc), VHOST_ACCESS_RO); >>+ if (unlikely(!desc_addr)) { >>+ rc = -1; >>+ goto out; >>+ } >>+ >>+ desc_offset = 0; >>+ desc_avail = desc->len; >>+ } else { >>+ desc_avail = desc->len - dev->vhost_hlen; >>+ desc_offset = dev->vhost_hlen; >>+ } >>+ >>+ rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); >>+ >>+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0); >>+ >>+ mbuf_offset = 0; >>+ mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; >>+ while (1) { >>+ uint64_t hpa; >>+ >>+ cpy_len = RTE_MIN(desc_avail, mbuf_avail); >>+ >>+ /* >>+ * A desc buf might across two host physical pages that are >>+ * not continuous. In such case (gpa_to_hpa returns 0), data >>+ * will be copied even though zero copy is enabled. >>+ */ >>+ if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev, >>+ desc->addr + desc_offset, cpy_len)))) { >>+ cur->data_len = cpy_len; >>+ cur->data_off = 0; >>+ cur->buf_addr = (void *)(uintptr_t)desc_addr; >>+ cur->buf_physaddr = hpa; >>+ >>+ /* >>+ * In zero copy mode, one mbuf can only reference data >>+ * for one or partial of one desc buff. >>+ */ >>+ mbuf_avail = cpy_len; >>+ } else { >>+ rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, >>+ mbuf_offset), >>+ (void *)((uintptr_t)(desc_addr + desc_offset)), >>+ cpy_len); >>+ } >>+ >>+ mbuf_avail -= cpy_len; >>+ mbuf_offset += cpy_len; >>+ desc_avail -= cpy_len; >>+ desc_offset += cpy_len; >>+ >>+ /* This desc reaches to its end, get the next one */ >>+ if (desc_avail == 0) { >>+ if ((desc->flags & VRING_DESC_F_NEXT) == 0) >>+ break; >>+ >>+ if ((++vq->last_used_idx & (vq->size - 1)) == 0) >>+ toggle_wrap_counter(vq); >>+ >>+ desc = &descs[vq->last_used_idx & (vq->size - 1)]; >>+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) { >>+ RTE_LOG(ERR, VHOST_DATA, "INDIRECT not supported yet"); >>+ } >>+ >>+ desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, >>+ sizeof(*desc), VHOST_ACCESS_RO); >>+ if (unlikely(!desc_addr)) { >>+ rc = -1; >>+ goto out; >Not sure, but, shouldn't you break here instead so rings gets updated if >some of the descs have been processed? It should break here and in other places in this loop as well. will fix it. >>+ } >>+ >>+ rte_prefetch0((void *)(uintptr_t)desc_addr); >>+ >>+ desc_offset = 0; >>+ desc_avail = desc->len; >>+ >>+ PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); >>+ } >>+ >>+ /* >>+ * This mbuf reaches to its end, get a new one >>+ * to hold more data. >>+ */ >>+ if (mbuf_avail == 0) { >>+ cur = rte_pktmbuf_alloc(mbuf_pool); >>+ if (unlikely(cur == NULL)) { >>+ RTE_LOG(ERR, VHOST_DATA, "Failed to " >>+ "allocate memory for mbuf.\n"); >>+ rc = -1; >>+ goto out; >>+ } >>+ >>+ prev->next = cur; >>+ prev->data_len = mbuf_offset; >>+ m->nb_segs += 1; >>+ m->pkt_len += mbuf_offset; >>+ prev = cur; >>+ >>+ mbuf_offset = 0; >>+ mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; >>+ } >>+ } >>+ >>+ if (hdr) >>+ vhost_dequeue_offload(hdr, m); >>+ >>+ if ((++vq->last_used_idx & (vq->size - 1)) == 0) >>+ toggle_wrap_counter(vq); >>+ >>+ rte_smp_wmb(); >>+ _set_desc_used(&descs[head_idx], wrap_counter); >>+ >>+ prev->data_len = mbuf_offset; >>+ m->pkt_len += mbuf_offset; >>+ >>+out: >>+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) >>+ vhost_user_iotlb_rd_unlock(vq); >>+ rte_spinlock_unlock(&vq->access_lock); >>+ >>+ return rc; >>+} >>+ >>+static inline uint16_t >>+vhost_dequeue_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, >>+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, >>+ uint16_t count) >>+{ >>+ uint16_t i; >>+ uint16_t idx; >>+ struct vring_desc_packed *desc = vq->desc_packed; >>+ int err; >>+ >>+ count = RTE_MIN(MAX_PKT_BURST, count); >>+ for (i = 0; i < count; i++) { >>+ idx = vq->last_used_idx & (vq->size - 1); >>+ if (!desc_is_avail(vq, &desc[idx])) >>+ break; >>+ rte_smp_rmb(); >>+ >>+ pkts[i] = rte_pktmbuf_alloc(mbuf_pool); >>+ if (unlikely(pkts[i] == NULL)) { >>+ RTE_LOG(ERR, VHOST_DATA, >>+ "Failed to allocate memory for mbuf.\n"); >>+ break; >>+ } >>+ >>+ err = dequeue_desc_packed(dev, vq, mbuf_pool, pkts[i], desc); >>+ if (unlikely(err)) { >>+ rte_pktmbuf_free(pkts[i]); >>+ break; >>+ } >>+ } >>+ >>+ rte_spinlock_unlock(&vq->access_lock); >I think you forgot to remove this line. yes Thanks! regards, Jens