From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id 88AC3DE3 for ; Thu, 18 Feb 2016 14:48:45 +0100 (CET) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga101.fm.intel.com with ESMTP; 18 Feb 2016 05:48:45 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.22,465,1449561600"; d="scan'208";a="655017111" Received: from yliu-dev.sh.intel.com ([10.239.66.49]) by FMSMGA003.fm.intel.com with ESMTP; 18 Feb 2016 05:48:44 -0800 From: Yuanhan Liu To: dev@dpdk.org Date: Thu, 18 Feb 2016 21:49:06 +0800 Message-Id: <1455803352-5518-2-git-send-email-yuanhan.liu@linux.intel.com> X-Mailer: git-send-email 1.9.0 In-Reply-To: <1455803352-5518-1-git-send-email-yuanhan.liu@linux.intel.com> References: <1449122773-25510-1-git-send-email-yuanhan.liu@linux.intel.com> <1455803352-5518-1-git-send-email-yuanhan.liu@linux.intel.com> Cc: "Michael S. Tsirkin" , Victor Kaplansky Subject: [dpdk-dev] [PATCH v2 1/7] vhost: refactor rte_vhost_dequeue_burst X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 18 Feb 2016 13:48:46 -0000 The current rte_vhost_dequeue_burst() implementation is a bit messy and logic twisted. And you could see repeat code here and there: it invokes rte_pktmbuf_alloc() three times at three different places! However, rte_vhost_dequeue_burst() acutally does a simple job: copy the packet data from vring desc to mbuf. What's tricky here is: - desc buff could be chained (by desc->next field), so that you need fetch next one if current is wholly drained. - One mbuf could not be big enough to hold all desc buff, hence you need to chain the mbuf as well, by the mbuf->next field. Even though, the logic could be simple. Here is the pseudo code. while (this_desc_is_not_drained_totally || has_next_desc) { if (this_desc_has_drained_totally) { this_desc = next_desc(); } if (mbuf_has_no_room) { mbuf = allocate_a_new_mbuf(); } COPY(mbuf, desc); } And this is how I refactored rte_vhost_dequeue_burst. Note that the old patch does a special handling for skipping virtio header. However, that could be simply done by adjusting desc_avail and desc_offset var: desc_avail = desc->len - vq->vhost_hlen; desc_offset = vq->vhost_hlen; This refactor makes the code much more readable (IMO), yet it reduces binary code size (nearly 2K). Signed-off-by: Yuanhan Liu --- v2: - fix potential NULL dereference bug of var "prev" and "head" --- lib/librte_vhost/vhost_rxtx.c | 297 +++++++++++++++++------------------------- 1 file changed, 116 insertions(+), 181 deletions(-) diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index 5e7e5b1..d5cd0fa 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -702,21 +702,104 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) } } +static inline struct rte_mbuf * +copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint16_t desc_idx, struct rte_mempool *mbuf_pool) +{ + struct vring_desc *desc; + uint64_t desc_addr; + uint32_t desc_avail, desc_offset; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; + struct rte_mbuf *head = NULL; + struct rte_mbuf *cur = NULL, *prev = NULL; + struct virtio_net_hdr *hdr; + + desc = &vq->desc[desc_idx]; + desc_addr = gpa_to_vva(dev, desc->addr); + rte_prefetch0((void *)(uintptr_t)desc_addr); + + /* Retrieve virtio net header */ + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); + desc_avail = desc->len - vq->vhost_hlen; + desc_offset = vq->vhost_hlen; + + mbuf_avail = 0; + mbuf_offset = 0; + while (desc_avail || (desc->flags & VRING_DESC_F_NEXT) != 0) { + /* This desc reachs to its end, get the next one */ + if (desc_avail == 0) { + desc = &vq->desc[desc->next]; + + desc_addr = gpa_to_vva(dev, desc->addr); + rte_prefetch0((void *)(uintptr_t)desc_addr); + + desc_offset = 0; + desc_avail = desc->len; + + PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); + } + + /* + * This mbuf reachs to its end, get a new one + * to hold more data. + */ + if (mbuf_avail == 0) { + cur = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(!cur)) { + RTE_LOG(ERR, VHOST_DATA, "Failed to " + "allocate memory for mbuf.\n"); + if (head) + rte_pktmbuf_free(head); + return NULL; + } + if (!head) { + head = cur; + } else { + prev->next = cur; + prev->data_len = mbuf_offset; + head->nb_segs += 1; + } + head->pkt_len += mbuf_offset; + prev = cur; + + mbuf_offset = 0; + mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; + } + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), + (void *)((uintptr_t)(desc_addr + desc_offset)), + cpy_len); + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + } + + if (prev) { + prev->data_len = mbuf_offset; + head->pkt_len += mbuf_offset; + + if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) + vhost_dequeue_offload(hdr, head); + } + + return head; +} + uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) { - struct rte_mbuf *m, *prev; struct vhost_virtqueue *vq; - struct vring_desc *desc; - uint64_t vb_addr = 0; - uint64_t vb_net_hdr_addr = 0; - uint32_t head[MAX_PKT_BURST]; + uint32_t desc_indexes[MAX_PKT_BURST]; uint32_t used_idx; uint32_t i; - uint16_t free_entries, entry_success = 0; + uint16_t free_entries; uint16_t avail_idx; - struct virtio_net_hdr *hdr = NULL; + struct rte_mbuf *m; if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) { RTE_LOG(ERR, VHOST_DATA, @@ -730,197 +813,49 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, return 0; avail_idx = *((volatile uint16_t *)&vq->avail->idx); - - /* If there are no available buffers then return. */ - if (vq->last_used_idx == avail_idx) + free_entries = avail_idx - vq->last_used_idx; + if (free_entries == 0) return 0; - LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, - dev->device_fh); + LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, dev->device_fh); - /* Prefetch available ring to retrieve head indexes. */ - rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]); + used_idx = vq->last_used_idx & (vq->size -1); - /*get the number of free entries in the ring*/ - free_entries = (avail_idx - vq->last_used_idx); + /* Prefetch available ring to retrieve head indexes. */ + rte_prefetch0(&vq->avail->ring[used_idx]); - free_entries = RTE_MIN(free_entries, count); - /* Limit to MAX_PKT_BURST. */ - free_entries = RTE_MIN(free_entries, MAX_PKT_BURST); + count = RTE_MIN(count, MAX_PKT_BURST); + count = RTE_MIN(count, free_entries); + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") about to dequeue %u buffers\n", + dev->device_fh, count); - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", - dev->device_fh, free_entries); /* Retrieve all of the head indexes first to avoid caching issues. */ - for (i = 0; i < free_entries; i++) - head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; + for (i = 0; i < count; i++) { + desc_indexes[i] = vq->avail->ring[(vq->last_used_idx + i) & + (vq->size - 1)]; + } /* Prefetch descriptor index. */ - rte_prefetch0(&vq->desc[head[entry_success]]); + rte_prefetch0(&vq->desc[desc_indexes[0]]); rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); - while (entry_success < free_entries) { - uint32_t vb_avail, vb_offset; - uint32_t seg_avail, seg_offset; - uint32_t cpy_len; - uint32_t seg_num = 0; - struct rte_mbuf *cur; - uint8_t alloc_err = 0; - - desc = &vq->desc[head[entry_success]]; - - vb_net_hdr_addr = gpa_to_vva(dev, desc->addr); - hdr = (struct virtio_net_hdr *)((uintptr_t)vb_net_hdr_addr); - - /* Discard first buffer as it is the virtio header */ - if (desc->flags & VRING_DESC_F_NEXT) { - desc = &vq->desc[desc->next]; - vb_offset = 0; - vb_avail = desc->len; - } else { - vb_offset = vq->vhost_hlen; - vb_avail = desc->len - vb_offset; - } - - /* Buffer address translation. */ - vb_addr = gpa_to_vva(dev, desc->addr); - /* Prefetch buffer address. */ - rte_prefetch0((void *)(uintptr_t)vb_addr); - - used_idx = vq->last_used_idx & (vq->size - 1); - - if (entry_success < (free_entries - 1)) { - /* Prefetch descriptor index. */ - rte_prefetch0(&vq->desc[head[entry_success+1]]); - rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]); - } - - /* Update used index buffer information. */ - vq->used->ring[used_idx].id = head[entry_success]; - vq->used->ring[used_idx].len = 0; - - /* Allocate an mbuf and populate the structure. */ - m = rte_pktmbuf_alloc(mbuf_pool); - if (unlikely(m == NULL)) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to allocate memory for mbuf.\n"); - break; - } - seg_offset = 0; - seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; - cpy_len = RTE_MIN(vb_avail, seg_avail); - - PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0); - - seg_num++; - cur = m; - prev = m; - while (cpy_len != 0) { - rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, seg_offset), - (void *)((uintptr_t)(vb_addr + vb_offset)), - cpy_len); - - seg_offset += cpy_len; - vb_offset += cpy_len; - vb_avail -= cpy_len; - seg_avail -= cpy_len; - - if (vb_avail != 0) { - /* - * The segment reachs to its end, - * while the virtio buffer in TX vring has - * more data to be copied. - */ - cur->data_len = seg_offset; - m->pkt_len += seg_offset; - /* Allocate mbuf and populate the structure. */ - cur = rte_pktmbuf_alloc(mbuf_pool); - if (unlikely(cur == NULL)) { - RTE_LOG(ERR, VHOST_DATA, "Failed to " - "allocate memory for mbuf.\n"); - rte_pktmbuf_free(m); - alloc_err = 1; - break; - } - - seg_num++; - prev->next = cur; - prev = cur; - seg_offset = 0; - seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; - } else { - if (desc->flags & VRING_DESC_F_NEXT) { - /* - * There are more virtio buffers in - * same vring entry need to be copied. - */ - if (seg_avail == 0) { - /* - * The current segment hasn't - * room to accomodate more - * data. - */ - cur->data_len = seg_offset; - m->pkt_len += seg_offset; - /* - * Allocate an mbuf and - * populate the structure. - */ - cur = rte_pktmbuf_alloc(mbuf_pool); - if (unlikely(cur == NULL)) { - RTE_LOG(ERR, - VHOST_DATA, - "Failed to " - "allocate memory " - "for mbuf\n"); - rte_pktmbuf_free(m); - alloc_err = 1; - break; - } - seg_num++; - prev->next = cur; - prev = cur; - seg_offset = 0; - seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; - } - - desc = &vq->desc[desc->next]; - - /* Buffer address translation. */ - vb_addr = gpa_to_vva(dev, desc->addr); - /* Prefetch buffer address. */ - rte_prefetch0((void *)(uintptr_t)vb_addr); - vb_offset = 0; - vb_avail = desc->len; - - PRINT_PACKET(dev, (uintptr_t)vb_addr, - desc->len, 0); - } else { - /* The whole packet completes. */ - cur->data_len = seg_offset; - m->pkt_len += seg_offset; - vb_avail = 0; - } - } - - cpy_len = RTE_MIN(vb_avail, seg_avail); - } - - if (unlikely(alloc_err == 1)) + for (i = 0; i < count; i++) { + m = copy_desc_to_mbuf(dev, vq, desc_indexes[i], mbuf_pool); + if (m == NULL) break; + pkts[i] = m; - m->nb_segs = seg_num; - if ((hdr->flags != 0) || (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)) - vhost_dequeue_offload(hdr, m); - - pkts[entry_success] = m; - vq->last_used_idx++; - entry_success++; + used_idx = vq->last_used_idx++ & (vq->size - 1); + vq->used->ring[used_idx].id = desc_indexes[i]; + vq->used->ring[used_idx].len = 0; } rte_compiler_barrier(); - vq->used->idx += entry_success; + vq->used->idx += i; + /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write(vq->callfd, (eventfd_t)1); - return entry_success; + + return i; } -- 1.9.0