From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id C39007DF7 for ; Fri, 26 Sep 2014 11:40:23 +0200 (CEST) Received: from azsmga001.ch.intel.com ([10.2.17.19]) by fmsmga103.fm.intel.com with ESMTP; 26 Sep 2014 02:37:30 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.04,603,1406617200"; d="scan'208";a="480166491" Received: from shvmail01.sh.intel.com ([10.239.29.42]) by azsmga001.ch.intel.com with ESMTP; 26 Sep 2014 02:46:41 -0700 Received: from shecgisg003.sh.intel.com (shecgisg003.sh.intel.com [10.239.29.90]) by shvmail01.sh.intel.com with ESMTP id s8Q9keaA006911; Fri, 26 Sep 2014 17:46:40 +0800 Received: from shecgisg003.sh.intel.com (localhost [127.0.0.1]) by shecgisg003.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id s8Q9kbLT027551; Fri, 26 Sep 2014 17:46:39 +0800 Received: (from hxie5@localhost) by shecgisg003.sh.intel.com (8.13.6/8.13.6/Submit) id s8Q9kbjE027547; Fri, 26 Sep 2014 17:46:37 +0800 From: Huawei Xie To: dev@dpdk.org Date: Fri, 26 Sep 2014 17:45:51 +0800 Message-Id: <1411724758-27488-5-git-send-email-huawei.xie@intel.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1411724758-27488-1-git-send-email-huawei.xie@intel.com> References: <1411724758-27488-1-git-send-email-huawei.xie@intel.com> Subject: [dpdk-dev] [PATCH v5 04/11] lib/librte_vhost: merge vhost merge-able rx. merge vhost tx fix. X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 26 Sep 2014 09:40:25 -0000 Merge vhost merge-able rx. For vhost tx, previous vhost merge-able feature introduces virtio_dev_merge_tx, and calls virtio_dev_tx and vritio_dev_merge_tx respectively depends on whether the vhost device supports merge-able feature. There is no so called merge-tx, it is actually fix for memcpy from chained vring desc to chained mbuf. Use virtio_dev_merge_tx as the base for vhost tx. Signed-off-by: Huawei Xie --- lib/librte_vhost/rte_virtio_net.h | 16 +- lib/librte_vhost/vhost_rxtx.c | 568 +++++++++++++++++++++++++++++++++----- 2 files changed, 511 insertions(+), 73 deletions(-) diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h index 08dc6f4..99ddfc1 100644 --- a/lib/librte_vhost/rte_virtio_net.h +++ b/lib/librte_vhost/rte_virtio_net.h @@ -53,9 +53,18 @@ /* Enum for virtqueue management. */ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; - -/* - * Structure contains variables relevant to TX/RX virtqueues. +#define BUF_VECTOR_MAX 256 +/** + * Structure contains buffer address, length and descriptor index + * from vring to do scatter RX. + */ +struct buf_vector { + uint64_t buf_addr; + uint32_t buf_len; + uint32_t desc_idx; +}; +/** + * Structure contains variables relevant to RX/TX virtqueues. */ struct vhost_virtqueue { @@ -69,6 +78,7 @@ struct vhost_virtqueue volatile uint16_t last_used_idx_res; /* Used for multiple devices reserving buffers. */ eventfd_t callfd; /* Currently unused as polling mode is enabled. */ eventfd_t kickfd; /* Used to notify the guest (trigger interrupt). */ + struct buf_vector buf_vec[BUF_VECTOR_MAX]; /**< for scatter RX. */ } __rte_cache_aligned; diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index 0d96c43..81368e6 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -49,8 +49,8 @@ * count is returned to indicate the number of packets that were succesfully * added to the RX queue. This function works when mergeable is disabled. */ -uint32_t -rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) +static inline uint32_t __attribute__((always_inline)) +virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; @@ -61,7 +61,6 @@ rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mb uint64_t buff_hdr_addr = 0; uint32_t head[VHOST_MAX_PKT_BURST], packet_len = 0; uint32_t head_idx, packet_success = 0; - uint32_t mergeable, mrg_count = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; @@ -101,9 +100,6 @@ rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mb /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); - /* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */ - mergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF); - /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; @@ -122,27 +118,23 @@ rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mb /* Prefetch buffer address. */ rte_prefetch0((void*)(uintptr_t)buff_addr); - if (mergeable && (mrg_count != 0)) { - desc->len = packet_len = rte_pktmbuf_data_len(buff); + /* Copy virtio_hdr to packet and increment buffer address */ + buff_hdr_addr = buff_addr; + packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; + + /* + * If the descriptors are chained the header and data are placed in + * separate buffers. + */ + if (desc->flags & VRING_DESC_F_NEXT) { + desc->len = vq->vhost_hlen; + desc = &vq->desc[desc->next]; + /* Buffer address translation. */ + buff_addr = gpa_to_vva(dev, desc->addr); + desc->len = rte_pktmbuf_data_len(buff); } else { - /* Copy virtio_hdr to packet and increment buffer address */ - buff_hdr_addr = buff_addr; - packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; - - /* - * If the descriptors are chained the header and data are placed in - * separate buffers. - */ - if (desc->flags & VRING_DESC_F_NEXT) { - desc->len = vq->vhost_hlen; - desc = &vq->desc[desc->next]; - /* Buffer address translation. */ - buff_addr = gpa_to_vva(dev, desc->addr); - desc->len = rte_pktmbuf_data_len(buff); - } else { - buff_addr += vq->vhost_hlen; - desc->len = packet_len; - } + buff_addr += vq->vhost_hlen; + desc->len = packet_len; } @@ -161,21 +153,9 @@ rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mb res_cur_idx++; packet_success++; - /* If mergeable is disabled then a header is required per buffer. */ - if (!mergeable) { - rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); - VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); - } else { - mrg_count++; - /* Merge buffer can only handle so many buffers at a time. Tell the guest if this limit is reached. */ - if ((mrg_count == VHOST_MAX_MRG_PKT_BURST) || (res_cur_idx == res_end_idx)) { - virtio_hdr.num_buffers = mrg_count; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers); - rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); - VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); - mrg_count = 0; - } - } + rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, + vq->vhost_hlen); + VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); if (res_cur_idx < res_end_idx) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); @@ -197,18 +177,357 @@ rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mb return count; } +static inline uint32_t __attribute__((always_inline)) +copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, + uint16_t res_end_idx, struct rte_mbuf *pkt) +{ + uint32_t vec_idx = 0; + uint32_t entry_success = 0; + struct vhost_virtqueue *vq; + /* The virtio_hdr is initialised to 0. */ + struct virtio_net_hdr_mrg_rxbuf virtio_hdr = { + {0, 0, 0, 0, 0, 0}, 0}; + uint16_t cur_idx = res_base_idx; + uint64_t vb_addr = 0; + uint64_t vb_hdr_addr = 0; + uint32_t seg_offset = 0; + uint32_t vb_offset = 0; + uint32_t seg_avail; + uint32_t vb_avail; + uint32_t cpy_len, entry_len; + + if (pkt == NULL) + return 0; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| " + "End Index %d\n", + dev->device_fh, cur_idx, res_end_idx); + + /* + * Convert from gpa to vva + * (guest physical addr -> vhost virtual addr) + */ + vq = dev->virtqueue[VIRTIO_RXQ]; + vb_addr = + gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); + vb_hdr_addr = vb_addr; + + /* Prefetch buffer address. */ + rte_prefetch0((void *)(uintptr_t)vb_addr); + + virtio_hdr.num_buffers = res_end_idx - res_base_idx; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", + dev->device_fh, virtio_hdr.num_buffers); + + rte_memcpy((void *)(uintptr_t)vb_hdr_addr, + (const void *)&virtio_hdr, vq->vhost_hlen); + + VHOST_PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1); + + seg_avail = rte_pktmbuf_data_len(pkt); + vb_offset = vq->vhost_hlen; + vb_avail = + vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen; + + entry_len = vq->vhost_hlen; + + if (vb_avail == 0) { + uint32_t desc_idx = + vq->buf_vec[vec_idx].desc_idx; + vq->desc[desc_idx].len = vq->vhost_hlen; + + if ((vq->desc[desc_idx].flags + & VRING_DESC_F_NEXT) == 0) { + /* Update used ring with desc information */ + vq->used->ring[cur_idx & (vq->size - 1)].id + = vq->buf_vec[vec_idx].desc_idx; + vq->used->ring[cur_idx & (vq->size - 1)].len + = entry_len; + + entry_len = 0; + cur_idx++; + entry_success++; + } + + vec_idx++; + vb_addr = + gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); + + /* Prefetch buffer address. */ + rte_prefetch0((void *)(uintptr_t)vb_addr); + vb_offset = 0; + vb_avail = vq->buf_vec[vec_idx].buf_len; + } + + cpy_len = RTE_MIN(vb_avail, seg_avail); + + while (cpy_len > 0) { + /* Copy mbuf data to vring buffer */ + rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset), + (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset), + cpy_len); + + VHOST_PRINT_PACKET(dev, + (uintptr_t)(vb_addr + vb_offset), + cpy_len, 0); + + seg_offset += cpy_len; + vb_offset += cpy_len; + seg_avail -= cpy_len; + vb_avail -= cpy_len; + entry_len += cpy_len; + + if (seg_avail != 0) { + /* + * The virtio buffer in this vring + * entry reach to its end. + * But the segment doesn't complete. + */ + if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags & + VRING_DESC_F_NEXT) == 0) { + /* Update used ring with desc information */ + vq->used->ring[cur_idx & (vq->size - 1)].id + = vq->buf_vec[vec_idx].desc_idx; + vq->used->ring[cur_idx & (vq->size - 1)].len + = entry_len; + entry_len = 0; + cur_idx++; + entry_success++; + } + + vec_idx++; + vb_addr = gpa_to_vva(dev, + vq->buf_vec[vec_idx].buf_addr); + vb_offset = 0; + vb_avail = vq->buf_vec[vec_idx].buf_len; + cpy_len = RTE_MIN(vb_avail, seg_avail); + } else { + /* + * This current segment complete, need continue to + * check if the whole packet complete or not. + */ + pkt = pkt->pkt.next; + if (pkt != NULL) { + /* + * There are more segments. + */ + if (vb_avail == 0) { + /* + * This current buffer from vring is + * used up, need fetch next buffer + * from buf_vec. + */ + uint32_t desc_idx = + vq->buf_vec[vec_idx].desc_idx; + vq->desc[desc_idx].len = vb_offset; + + if ((vq->desc[desc_idx].flags & + VRING_DESC_F_NEXT) == 0) { + uint16_t wrapped_idx = + cur_idx & (vq->size - 1); + /* + * Update used ring with the + * descriptor information + */ + vq->used->ring[wrapped_idx].id + = desc_idx; + vq->used->ring[wrapped_idx].len + = entry_len; + entry_success++; + entry_len = 0; + cur_idx++; + } + + /* Get next buffer from buf_vec. */ + vec_idx++; + vb_addr = gpa_to_vva(dev, + vq->buf_vec[vec_idx].buf_addr); + vb_avail = + vq->buf_vec[vec_idx].buf_len; + vb_offset = 0; + } + + seg_offset = 0; + seg_avail = rte_pktmbuf_data_len(pkt); + cpy_len = RTE_MIN(vb_avail, seg_avail); + } else { + /* + * This whole packet completes. + */ + uint32_t desc_idx = + vq->buf_vec[vec_idx].desc_idx; + vq->desc[desc_idx].len = vb_offset; + + while (vq->desc[desc_idx].flags & + VRING_DESC_F_NEXT) { + desc_idx = vq->desc[desc_idx].next; + vq->desc[desc_idx].len = 0; + } + + /* Update used ring with desc information */ + vq->used->ring[cur_idx & (vq->size - 1)].id + = vq->buf_vec[vec_idx].desc_idx; + vq->used->ring[cur_idx & (vq->size - 1)].len + = entry_len; + entry_len = 0; + cur_idx++; + entry_success++; + seg_avail = 0; + cpy_len = RTE_MIN(vb_avail, seg_avail); + } + } + } + + return entry_success; +} + +/* + * This function works for mergeable RX. + */ +static inline uint32_t __attribute__((always_inline)) +virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, + uint32_t count) +{ + struct vhost_virtqueue *vq; + uint32_t pkt_idx = 0, entry_success = 0; + uint16_t avail_idx, res_cur_idx; + uint16_t res_base_idx, res_end_idx; + uint8_t success = 0; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n", + dev->device_fh); + if (unlikely(queue_id != VIRTIO_RXQ)) { + LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); + } + + vq = dev->virtqueue[VIRTIO_RXQ]; + count = RTE_MIN((uint32_t)VHOST_MAX_PKT_BURST, count); + + if (count == 0) + return 0; + + for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { + uint32_t secure_len = 0; + uint16_t need_cnt; + uint32_t vec_idx = 0; + uint32_t pkt_len = pkts[pkt_idx]->pkt.pkt_len + vq->vhost_hlen; + uint16_t i, id; + + do { + /* + * As many data cores may want access to available + * buffers, they need to be reserved. + */ + res_base_idx = vq->last_used_idx_res; + res_cur_idx = res_base_idx; + + do { + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + if (unlikely(res_cur_idx == avail_idx)) { + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") Failed " + "to get enough desc from " + "vring\n", + dev->device_fh); + return pkt_idx; + } else { + uint16_t wrapped_idx = + (res_cur_idx) & (vq->size - 1); + uint32_t idx = + vq->avail->ring[wrapped_idx]; + uint8_t next_desc; + + do { + next_desc = 0; + secure_len += vq->desc[idx].len; + if (vq->desc[idx].flags & + VRING_DESC_F_NEXT) { + idx = vq->desc[idx].next; + next_desc = 1; + } + } while (next_desc); + + res_cur_idx++; + } + } while (pkt_len > secure_len); + + /* vq->last_used_idx_res is atomically updated. */ + success = rte_atomic16_cmpset(&vq->last_used_idx_res, + res_base_idx, + res_cur_idx); + } while (success == 0); + + id = res_base_idx; + need_cnt = res_cur_idx - res_base_idx; + + for (i = 0; i < need_cnt; i++, id++) { + uint16_t wrapped_idx = id & (vq->size - 1); + uint32_t idx = vq->avail->ring[wrapped_idx]; + uint8_t next_desc; + do { + next_desc = 0; + vq->buf_vec[vec_idx].buf_addr = + vq->desc[idx].addr; + vq->buf_vec[vec_idx].buf_len = + vq->desc[idx].len; + vq->buf_vec[vec_idx].desc_idx = idx; + vec_idx++; + + if (vq->desc[idx].flags & VRING_DESC_F_NEXT) { + idx = vq->desc[idx].next; + next_desc = 1; + } + } while (next_desc); + } + + res_end_idx = res_cur_idx; + + entry_success = copy_from_mbuf_to_vring(dev, res_base_idx, + res_end_idx, pkts[pkt_idx]); + + rte_compiler_barrier(); + + /* + * Wait until it's our turn to add our buffer + * to the used ring. + */ + while (unlikely(vq->last_used_idx != res_base_idx)) + rte_pause(); + + *(volatile uint16_t *)&vq->used->idx += entry_success; + vq->last_used_idx = res_end_idx; + + /* Kick the guest if necessary. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) + eventfd_write((int)vq->kickfd, 1); + } + + return count; +} + +uint32_t +rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) +{ + if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))) + return virtio_dev_merge_rx(dev, queue_id, pkts, count); + else + return virtio_dev_rx(dev, queue_id, pkts, count); +} + + uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count) { - struct rte_mbuf *mbuf; + struct rte_mbuf *m, *prev; struct vhost_virtqueue *vq; struct vring_desc *desc; - uint64_t buff_addr = 0; + uint64_t vb_addr = 0; uint32_t head[VHOST_MAX_PKT_BURST]; uint32_t used_idx; uint32_t i; - uint16_t free_entries, packet_success = 0; + uint16_t free_entries, entry_success = 0; uint16_t avail_idx; if (unlikely(queue_id != VIRTIO_TXQ)) { @@ -223,7 +542,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_me if (vq->last_used_idx == avail_idx) return 0; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh); + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s()\n", __func__, + dev->device_fh); /* Prefetch available ring to retrieve head indexes. */ rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]); @@ -231,11 +551,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_me /*get the number of free entries in the ring*/ free_entries = (avail_idx - vq->last_used_idx); - if (free_entries > count) - free_entries = count; + free_entries = RTE_MIN(free_entries, count); /* Limit to MAX_PKT_BURST. */ - if (free_entries > VHOST_MAX_PKT_BURST) - free_entries = VHOST_MAX_PKT_BURST; + free_entries = RTE_MIN(free_entries, VHOST_MAX_PKT_BURST); LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries); /* Retrieve all of the head indexes first to avoid caching issues. */ @@ -243,56 +561,166 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_me head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; /* Prefetch descriptor index. */ - rte_prefetch0(&vq->desc[head[packet_success]]); + rte_prefetch0(&vq->desc[head[entry_success]]); rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); - while (packet_success < free_entries) { - desc = &vq->desc[head[packet_success]]; + while (entry_success < free_entries) { + uint32_t vb_avail, vb_offset; + uint32_t seg_avail, seg_offset; + uint32_t cpy_len; + uint32_t seg_num = 0; + struct rte_mbuf *cur; + uint8_t alloc_err = 0; + + desc = &vq->desc[head[entry_success]]; /* Discard first buffer as it is the virtio header */ desc = &vq->desc[desc->next]; /* Buffer address translation. */ - buff_addr = gpa_to_vva(dev, desc->addr); + vb_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ - rte_prefetch0((void*)(uintptr_t)buff_addr); + rte_prefetch0((void *)(uintptr_t)vb_addr); used_idx = vq->last_used_idx & (vq->size - 1); - if (packet_success < (free_entries - 1)) { + if (entry_success < (free_entries - 1)) { /* Prefetch descriptor index. */ - rte_prefetch0(&vq->desc[head[packet_success+1]]); + rte_prefetch0(&vq->desc[head[entry_success+1]]); rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]); } /* Update used index buffer information. */ - vq->used->ring[used_idx].id = head[packet_success]; + vq->used->ring[used_idx].id = head[entry_success]; vq->used->ring[used_idx].len = 0; - mbuf = rte_pktmbuf_alloc(mbuf_pool); - if (unlikely(mbuf == NULL)) { - RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n"); - return packet_success; + vb_offset = 0; + vb_avail = desc->len; + + seg_avail = 0; + /* Allocate an mbuf and populate the structure. */ + m = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(m == NULL)) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); + return entry_success; } - mbuf->pkt.data_len = desc->len; - mbuf->pkt.pkt_len = mbuf->pkt.data_len; + seg_offset = 0; + seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; + cpy_len = RTE_MIN(vb_avail, seg_avail); + + VHOST_PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0); + + + seg_num++; + cur = m; + prev = m; + while (cpy_len != 0) { + rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset), + (void *)((uintptr_t)(vb_addr + vb_offset)), + cpy_len); + + seg_offset += cpy_len; + vb_offset += cpy_len; + vb_avail -= cpy_len; + seg_avail -= cpy_len; + + if (vb_avail != 0) { + /* + * The segment reachs to its end, + * while the virtio buffer in TX vring has + * more data to be copied. + */ + cur->pkt.data_len = seg_offset; + m->pkt.pkt_len += seg_offset; + /* Allocate mbuf and populate the structure. */ + cur = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(cur == NULL)) { + RTE_LOG(ERR, VHOST_DATA, "Failed to " + "allocate memory for mbuf.\n"); + rte_pktmbuf_free(m); + alloc_err = 1; + break; + } + + seg_num++; + prev->pkt.next = cur; + prev = cur; + seg_offset = 0; + seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; + } else { + if (desc->flags & VRING_DESC_F_NEXT) { + /* + * There are more virtio buffers in + * same vring entry need to be copied. + */ + if (seg_avail == 0) { + /* + * The current segment hasn't + * room to accomodate more + * data. + */ + cur->pkt.data_len = seg_offset; + m->pkt.pkt_len += seg_offset; + /* + * Allocate an mbuf and + * populate the structure. + */ + cur = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(cur == NULL)) { + RTE_LOG(ERR, + VHOST_DATA, + "Failed to " + "allocate memory " + "for mbuf\n"); + rte_pktmbuf_free(m); + alloc_err = 1; + break; + } + seg_num++; + prev->pkt.next = cur; + prev = cur; + seg_offset = 0; + seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; + } + + desc = &vq->desc[desc->next]; + + /* Buffer address translation. */ + vb_addr = gpa_to_vva(dev, desc->addr); + /* Prefetch buffer address. */ + rte_prefetch0((void *)(uintptr_t)vb_addr); + vb_offset = 0; + vb_avail = desc->len; + + VHOST_PRINT_PACKET(dev, (uintptr_t)vb_addr, + desc->len, 0); + } else { + /* The whole packet completes. */ + cur->pkt.data_len = seg_offset; + m->pkt.pkt_len += seg_offset; + vb_avail = 0; + } + } - rte_memcpy((void *) mbuf->pkt.data, - (const void *) buff_addr, mbuf->pkt.data_len); + cpy_len = RTE_MIN(vb_avail, seg_avail); + } - pkts[packet_success] = mbuf; + if (unlikely(alloc_err == 1)) + break; - VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0); + m->pkt.nb_segs = seg_num; + pkts[entry_success] = m; vq->last_used_idx++; - packet_success++; + entry_success++; } rte_compiler_barrier(); - vq->used->idx += packet_success; + vq->used->idx += entry_success; /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->kickfd, 1); + return entry_success; - return packet_success; } -- 1.8.1.4