From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by dpdk.org (Postfix) with ESMTP id 417E36932 for ; Tue, 16 Aug 2016 12:58:00 +0200 (CEST) Received: from orsmga002.jf.intel.com ([10.7.209.21]) by fmsmga104.fm.intel.com with ESMTP; 16 Aug 2016 03:57:59 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.28,529,1464678000"; d="scan'208";a="1036366050" Received: from unknown (HELO dpdk5.sh.intel.com) ([10.239.129.118]) by orsmga002.jf.intel.com with ESMTP; 16 Aug 2016 03:57:59 -0700 From: Zhihong Wang To: dev@dpdk.org Cc: Zhihong Wang Date: Mon, 15 Aug 2016 23:50:02 -0400 Message-Id: <1471319402-112998-1-git-send-email-zhihong.wang@intel.com> X-Mailer: git-send-email 2.7.4 Subject: [dpdk-dev] [PATCH] optimize vhost enqueue X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 16 Aug 2016 10:58:01 -0000 This patch optimizes the vhost enqueue function: rte_vhost_enqueue_burst. Currently there're 2 callbacks for vhost enqueue: * virtio_dev_merge_rx for mrg_rxbuf turned on cases. * virtio_dev_rx for mrg_rxbuf turned off cases. The virtio_dev_merge_rx doesn't provide optimal performance, also it is reported having compatibility issue working with Windows VMs. Besides, having 2 separated functions increases maintenance efforts. This patch uses a single function logic to replace the current 2 for better maintainability, and provides better performance by optimizing caching behavior especially for mrg_rxbuf turned on cases. It also fixes the issue working with Windows VMs. Signed-off-by: Zhihong Wang --- lib/librte_vhost/vhost-net.h | 6 +- lib/librte_vhost/vhost_rxtx.c | 582 ++++++++++++++---------------------------- lib/librte_vhost/virtio-net.c | 15 +- 3 files changed, 208 insertions(+), 395 deletions(-) diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h index 38593a2..a15182c 100644 --- a/lib/librte_vhost/vhost-net.h +++ b/lib/librte_vhost/vhost-net.h @@ -71,7 +71,7 @@ struct vhost_virtqueue { uint32_t size; /* Last index used on the available ring */ - volatile uint16_t last_used_idx; + uint16_t last_used_idx; #define VIRTIO_INVALID_EVENTFD (-1) #define VIRTIO_UNINITIALIZED_EVENTFD (-2) @@ -85,6 +85,10 @@ struct vhost_virtqueue { /* Physical address of used ring, for logging */ uint64_t log_guest_addr; + + /* Shadow used ring for performance */ + struct vring_used_elem *shadow_used_ring; + uint32_t shadow_used_idx; } __rte_cache_aligned; /* Old kernels have no such macro defined */ diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index 08a73fd..1263168 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb) return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; } -static void +static inline void __attribute__((always_inline)) virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) { if (m_buf->ol_flags & PKT_TX_L4_MASK) { @@ -125,427 +125,227 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) } } -static inline void -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, - struct virtio_net_hdr_mrg_rxbuf hdr) -{ - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr; - else - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr; -} - -static inline int __attribute__((always_inline)) -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mbuf *m, uint16_t desc_idx) +uint16_t +rte_vhost_enqueue_burst(int vid, uint16_t queue_id, + struct rte_mbuf **pkts, uint16_t count) { - uint32_t desc_avail, desc_offset; - uint32_t mbuf_avail, mbuf_offset; - uint32_t cpy_len; + struct virtio_net_hdr_mrg_rxbuf *virtio_hdr; + struct vhost_virtqueue *vq; struct vring_desc *desc; - uint64_t desc_addr; - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; - - desc = &vq->desc[desc_idx]; - desc_addr = gpa_to_vva(dev, desc->addr); - /* - * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid - * performance issue with some versions of gcc (4.8.4 and 5.3.0) which - * otherwise stores offset on the stack instead of in a register. - */ - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) - return -1; - - rte_prefetch0((void *)(uintptr_t)desc_addr); - - virtio_enqueue_offload(m, &virtio_hdr.hdr); - copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); - vhost_log_write(dev, desc->addr, dev->vhost_hlen); - PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - - desc_offset = dev->vhost_hlen; - desc_avail = desc->len - dev->vhost_hlen; - - mbuf_avail = rte_pktmbuf_data_len(m); - mbuf_offset = 0; - while (mbuf_avail != 0 || m->next != NULL) { - /* done with current mbuf, fetch next */ - if (mbuf_avail == 0) { - m = m->next; - - mbuf_offset = 0; - mbuf_avail = rte_pktmbuf_data_len(m); - } - - /* done with current desc buf, fetch next */ - if (desc_avail == 0) { - if ((desc->flags & VRING_DESC_F_NEXT) == 0) { - /* Room in vring buffer is not enough */ - return -1; - } - if (unlikely(desc->next >= vq->size)) - return -1; - - desc = &vq->desc[desc->next]; - desc_addr = gpa_to_vva(dev, desc->addr); - if (unlikely(!desc_addr)) - return -1; - - desc_offset = 0; - desc_avail = desc->len; - } - - cpy_len = RTE_MIN(desc_avail, mbuf_avail); - rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), - rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), - cpy_len); - vhost_log_write(dev, desc->addr + desc_offset, cpy_len); - PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - cpy_len, 0); - - mbuf_avail -= cpy_len; - mbuf_offset += cpy_len; - desc_avail -= cpy_len; - desc_offset += cpy_len; - } - - return 0; -} + struct virtio_net *dev; + struct rte_mbuf *mbuf; + uint64_t desc_host_write_addr = 0; + uint32_t desc_chain_head = 0; + uint32_t desc_chain_len = 0; + uint32_t desc_current = 0; + uint32_t desc_write_offset = 0; + uint32_t used_idx_static = 0; + uint32_t pkt_idx = 0; + uint32_t pkt_left = 0; + uint32_t pkt_sent = 0; + uint32_t mbuf_len = 0; + uint32_t mbuf_len_left = 0; + uint32_t copy_len = 0; + uint32_t copy_virtio_hdr = 0; + uint32_t is_mrg_rxbuf = 0; + uint32_t is_virtio_1 = 0; + + if (unlikely(count == 0)) + return 0; -/** - * This function adds buffers to the virtio devices RX virtqueue. Buffers can - * be received from the physical port or from another virtio device. A packet - * count is returned to indicate the number of packets that are succesfully - * added to the RX queue. This function works when the mbuf is scattered, but - * it doesn't support the mergeable feature. - */ -static inline uint32_t __attribute__((always_inline)) -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, - struct rte_mbuf **pkts, uint32_t count) -{ - struct vhost_virtqueue *vq; - uint16_t avail_idx, free_entries, start_idx; - uint16_t desc_indexes[MAX_PKT_BURST]; - uint16_t used_idx; - uint32_t i; + count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); + dev = get_device(vid); + if (unlikely(!dev)) return 0; - } - vq = dev->virtqueue[queue_id]; - if (unlikely(vq->enabled == 0)) + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) return 0; - avail_idx = *((volatile uint16_t *)&vq->avail->idx); - start_idx = vq->last_used_idx; - free_entries = avail_idx - start_idx; - count = RTE_MIN(count, free_entries); - count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); - if (count == 0) + vq = dev->virtqueue[queue_id]; + if (unlikely(!vq->enabled)) return 0; - LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", - dev->vid, start_idx, start_idx + count); + if (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) + is_mrg_rxbuf = 1; + + if (dev->features & (1ULL << VIRTIO_F_VERSION_1)) + is_virtio_1 = 1; + + pkt_idx = 0; + pkt_left = count; + used_idx_static = vq->last_used_idx & (vq->size - 1); + vq->shadow_used_idx = 0; + + while (pkt_left > 0) { + if (unlikely(vq->avail->idx == vq->last_used_idx)) + goto done; + + if (pkt_left > 1 && vq->avail->idx != vq->last_used_idx + 1) + rte_prefetch0(&vq->desc[ + vq->avail->ring[ + (vq->last_used_idx + 1) & + (vq->size - 1)]]); + + mbuf = pkts[pkt_idx]; + mbuf_len = rte_pktmbuf_data_len(mbuf); + mbuf_len_left = mbuf_len; + pkt_idx++; + pkt_left--; + + desc_chain_head = vq->avail->ring[(vq->last_used_idx) & + (vq->size - 1)]; + desc_current = desc_chain_head; + desc = &vq->desc[desc_current]; + desc_host_write_addr = gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_host_write_addr)) + goto done; + + virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *) + (uintptr_t)desc_host_write_addr; + copy_virtio_hdr = 1; + + vhost_log_write(dev, desc->addr, dev->vhost_hlen); + desc_write_offset = dev->vhost_hlen; + desc_chain_len = desc_write_offset; + desc_host_write_addr += desc_write_offset; + + while (1) { + if (!mbuf_len_left) { + if (mbuf->next) { + mbuf = mbuf->next; + mbuf_len = rte_pktmbuf_data_len(mbuf); + mbuf_len_left = mbuf_len; + } else + break; + } - /* Retrieve all of the desc indexes first to avoid caching issues. */ - rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]); - for (i = 0; i < count; i++) { - used_idx = (start_idx + i) & (vq->size - 1); - desc_indexes[i] = vq->avail->ring[used_idx]; - vq->used->ring[used_idx].id = desc_indexes[i]; - vq->used->ring[used_idx].len = pkts[i]->pkt_len + - dev->vhost_hlen; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); - } + if (desc->len <= desc_write_offset) { + if (desc->flags & VRING_DESC_F_NEXT) { + desc_write_offset = 0; + desc_current = desc->next; + desc = &vq->desc[desc_current]; + desc_host_write_addr = + gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_host_write_addr)) + goto rollback; + } else if (is_mrg_rxbuf) { + vq->shadow_used_ring[ + vq->shadow_used_idx].id = + desc_chain_head; + vq->shadow_used_ring[ + vq->shadow_used_idx].len = + desc_chain_len; + vq->shadow_used_idx++; + vq->last_used_idx++; + virtio_hdr->num_buffers++; + if (unlikely(vq->avail->idx == + vq->last_used_idx)) + goto rollback; + + desc_chain_head = vq->avail->ring[ + (vq->last_used_idx) & + (vq->size - 1)]; + desc_current = desc_chain_head; + desc = &vq->desc[desc_current]; + desc_host_write_addr = + gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_host_write_addr)) + goto rollback; + + desc_chain_len = 0; + desc_write_offset = 0; + } else + goto rollback; + } - rte_prefetch0(&vq->desc[desc_indexes[0]]); - for (i = 0; i < count; i++) { - uint16_t desc_idx = desc_indexes[i]; - int err; + copy_len = RTE_MIN(desc->len - desc_write_offset, + mbuf_len_left); + if (copy_virtio_hdr) { + copy_virtio_hdr = 0; + memset((void *)(uintptr_t)&(virtio_hdr->hdr), + 0, dev->vhost_hlen); + virtio_enqueue_offload(mbuf, + &(virtio_hdr->hdr)); + if (is_mrg_rxbuf || is_virtio_1) + virtio_hdr->num_buffers = 1; + } - err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx); - if (unlikely(err)) { - used_idx = (start_idx + i) & (vq->size - 1); - vq->used->ring[used_idx].len = dev->vhost_hlen; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); + rte_memcpy((void *)(uintptr_t)desc_host_write_addr, + rte_pktmbuf_mtod_offset(mbuf, void *, + mbuf_len - mbuf_len_left), + copy_len); + vhost_log_write(dev, desc->addr + desc_write_offset, + copy_len); + mbuf_len_left -= copy_len; + desc_write_offset += copy_len; + desc_host_write_addr += copy_len; + desc_chain_len += copy_len; } - if (i + 1 < count) - rte_prefetch0(&vq->desc[desc_indexes[i+1]]); + vq->shadow_used_ring[vq->shadow_used_idx].id = desc_chain_head; + vq->shadow_used_ring[vq->shadow_used_idx].len = desc_chain_len; + vq->shadow_used_idx++; + vq->last_used_idx++; + pkt_sent++; } - rte_smp_wmb(); - - *(volatile uint16_t *)&vq->used->idx += count; - vq->last_used_idx += count; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, idx), - sizeof(vq->used->idx)); - - /* flush used->idx update before we read avail->flags. */ - rte_mb(); - - /* Kick the guest if necessary. */ - if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) - && (vq->callfd >= 0)) - eventfd_write(vq->callfd, (eventfd_t)1); - return count; -} - -static inline int -fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx, - uint32_t *allocated, uint32_t *vec_idx, - struct buf_vector *buf_vec) -{ - uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; - uint32_t vec_id = *vec_idx; - uint32_t len = *allocated; - - while (1) { - if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) - return -1; - - len += vq->desc[idx].len; - buf_vec[vec_id].buf_addr = vq->desc[idx].addr; - buf_vec[vec_id].buf_len = vq->desc[idx].len; - buf_vec[vec_id].desc_idx = idx; - vec_id++; - - if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0) - break; - - idx = vq->desc[idx].next; - } - - *allocated = len; - *vec_idx = vec_id; - - return 0; -} - -/* - * Returns -1 on fail, 0 on success - */ -static inline int -reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size, - uint16_t *end, struct buf_vector *buf_vec) -{ - uint16_t cur_idx; - uint16_t avail_idx; - uint32_t allocated = 0; - uint32_t vec_idx = 0; - uint16_t tries = 0; - - cur_idx = vq->last_used_idx; - - while (1) { - avail_idx = *((volatile uint16_t *)&vq->avail->idx); - if (unlikely(cur_idx == avail_idx)) - return -1; - - if (unlikely(fill_vec_buf(vq, cur_idx, &allocated, - &vec_idx, buf_vec) < 0)) - return -1; - - cur_idx++; - tries++; - - if (allocated >= size) - break; - - /* - * if we tried all available ring items, and still - * can't get enough buf, it means something abnormal - * happened. - */ - if (unlikely(tries >= vq->size)) - return -1; - } - - *end = cur_idx; - return 0; -} - -static inline uint32_t __attribute__((always_inline)) -copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t end_idx, struct rte_mbuf *m, - struct buf_vector *buf_vec) -{ - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; - uint32_t vec_idx = 0; - uint16_t start_idx = vq->last_used_idx; - uint16_t cur_idx = start_idx; - uint64_t desc_addr; - uint32_t mbuf_offset, mbuf_avail; - uint32_t desc_offset, desc_avail; - uint32_t cpy_len; - uint16_t desc_idx, used_idx; - - if (unlikely(m == NULL)) - return 0; - - LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", - dev->vid, cur_idx, end_idx); - - desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); - if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) - return 0; - - rte_prefetch0((void *)(uintptr_t)desc_addr); - - virtio_hdr.num_buffers = end_idx - start_idx; - LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", - dev->vid, virtio_hdr.num_buffers); - - virtio_enqueue_offload(m, &virtio_hdr.hdr); - copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); - vhost_log_write(dev, buf_vec[vec_idx].buf_addr, dev->vhost_hlen); - PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - - desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; - desc_offset = dev->vhost_hlen; - - mbuf_avail = rte_pktmbuf_data_len(m); - mbuf_offset = 0; - while (mbuf_avail != 0 || m->next != NULL) { - /* done with current desc buf, get the next one */ - if (desc_avail == 0) { - desc_idx = buf_vec[vec_idx].desc_idx; - - if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) { - /* Update used ring with desc information */ - used_idx = cur_idx++ & (vq->size - 1); - vq->used->ring[used_idx].id = desc_idx; - vq->used->ring[used_idx].len = desc_offset; - vhost_log_used_vring(dev, vq, +done: + if (likely(vq->shadow_used_idx > 0)) { + if (used_idx_static + vq->shadow_used_idx < vq->size) { + rte_memcpy(&vq->used->ring[used_idx_static], + &vq->shadow_used_ring[0], + vq->shadow_used_idx * + sizeof(struct vring_used_elem)); + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, - ring[used_idx]), - sizeof(vq->used->ring[used_idx])); - } - - vec_idx++; - desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); - if (unlikely(!desc_addr)) - return 0; - - /* Prefetch buffer address. */ - rte_prefetch0((void *)(uintptr_t)desc_addr); - desc_offset = 0; - desc_avail = buf_vec[vec_idx].buf_len; - } - - /* done with current mbuf, get the next one */ - if (mbuf_avail == 0) { - m = m->next; + ring[used_idx_static]), + vq->shadow_used_idx * + sizeof(struct vring_used_elem)); + } else { + uint32_t part_1 = vq->size - used_idx_static; + uint32_t part_2 = vq->shadow_used_idx - part_1; - mbuf_offset = 0; - mbuf_avail = rte_pktmbuf_data_len(m); + rte_memcpy(&vq->used->ring[used_idx_static], + &vq->shadow_used_ring[0], + part_1 * + sizeof(struct vring_used_elem)); + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, + ring[used_idx_static]), + part_1 * + sizeof(struct vring_used_elem)); + rte_memcpy(&vq->used->ring[0], + &vq->shadow_used_ring[part_1], + part_2 * + sizeof(struct vring_used_elem)); + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, + ring[0]), + part_2 * + sizeof(struct vring_used_elem)); } - - cpy_len = RTE_MIN(desc_avail, mbuf_avail); - rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), - rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), - cpy_len); - vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset, - cpy_len); - PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - cpy_len, 0); - - mbuf_avail -= cpy_len; - mbuf_offset += cpy_len; - desc_avail -= cpy_len; - desc_offset += cpy_len; } - used_idx = cur_idx & (vq->size - 1); - vq->used->ring[used_idx].id = buf_vec[vec_idx].desc_idx; - vq->used->ring[used_idx].len = desc_offset; + rte_smp_wmb(); + vq->used->idx = vq->last_used_idx; vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); - - return end_idx - start_idx; -} - -static inline uint32_t __attribute__((always_inline)) -virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, - struct rte_mbuf **pkts, uint32_t count) -{ - struct vhost_virtqueue *vq; - uint32_t pkt_idx = 0, nr_used = 0; - uint16_t end; - struct buf_vector buf_vec[BUF_VECTOR_MAX]; - - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - if (unlikely(vq->enabled == 0)) - return 0; - - count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); - if (count == 0) - return 0; - - for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { - uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; - - if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len, - &end, buf_vec) < 0)) { - LOG_DEBUG(VHOST_DATA, - "(%d) failed to get enough desc from vring\n", - dev->vid); - break; - } - - nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end, - pkts[pkt_idx], buf_vec); - rte_smp_wmb(); - - *(volatile uint16_t *)&vq->used->idx += nr_used; - vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), + offsetof(struct vring_used, idx), sizeof(vq->used->idx)); - vq->last_used_idx += nr_used; - } - - if (likely(pkt_idx)) { - /* flush used->idx update before we read avail->flags. */ - rte_mb(); - - /* Kick the guest if necessary. */ + rte_mb(); + if (likely(pkt_sent)) { if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) && (vq->callfd >= 0)) eventfd_write(vq->callfd, (eventfd_t)1); } - return pkt_idx; -} - -uint16_t -rte_vhost_enqueue_burst(int vid, uint16_t queue_id, - struct rte_mbuf **pkts, uint16_t count) -{ - struct virtio_net *dev = get_device(vid); + return pkt_sent; - if (!dev) - return 0; +rollback: + if (is_mrg_rxbuf || is_virtio_1) + vq->last_used_idx -= virtio_hdr->num_buffers - 1; - if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) - return virtio_dev_merge_rx(dev, queue_id, pkts, count); - else - return virtio_dev_rx(dev, queue_id, pkts, count); + goto done; } static void diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c index 1785695..87d09fa 100644 --- a/lib/librte_vhost/virtio-net.c +++ b/lib/librte_vhost/virtio-net.c @@ -152,10 +152,14 @@ cleanup_device(struct virtio_net *dev, int destroy) static void free_device(struct virtio_net *dev) { + struct vhost_virtqueue *vq; uint32_t i; - for (i = 0; i < dev->virt_qp_nb; i++) - rte_free(dev->virtqueue[i * VIRTIO_QNUM]); + for (i = 0; i < dev->virt_qp_nb; i++) { + vq = dev->virtqueue[i * VIRTIO_QNUM]; + rte_free(vq->shadow_used_ring); + rte_free(vq); + } rte_free(dev); } @@ -418,13 +422,18 @@ int vhost_set_vring_num(int vid, struct vhost_vring_state *state) { struct virtio_net *dev; + struct vhost_virtqueue *vq; dev = get_device(vid); if (dev == NULL) return -1; /* State->index refers to the queue index. The txq is 1, rxq is 0. */ - dev->virtqueue[state->index]->size = state->num; + vq = dev->virtqueue[state->index]; + vq->size = state->num; + vq->shadow_used_ring = rte_malloc("", + vq->size * sizeof(struct vring_used_elem), + RTE_CACHE_LINE_SIZE); return 0; } -- 2.7.4