From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id 38F805AB0 for ; Thu, 18 Aug 2016 15:41:09 +0200 (CEST) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by orsmga103.jf.intel.com with ESMTP; 18 Aug 2016 06:41:10 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.28,539,1464678000"; d="scan'208";a="750443058" Received: from unknown (HELO dpdk5.sh.intel.com) ([10.239.129.118]) by FMSMGA003.fm.intel.com with ESMTP; 18 Aug 2016 06:41:08 -0700 From: Zhihong Wang To: dev@dpdk.org Cc: maxime.coquelin@redhat.com, yuanhan.liu@linux.intel.com, Zhihong Wang Date: Thu, 18 Aug 2016 02:33:06 -0400 Message-Id: <1471501991-37257-2-git-send-email-zhihong.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1471501991-37257-1-git-send-email-zhihong.wang@intel.com> References: <1471319402-112998-1-git-send-email-zhihong.wang@intel.com> <1471501991-37257-1-git-send-email-zhihong.wang@intel.com> Subject: [dpdk-dev] [PATCH v2 1/6] vhost: rewrite enqueue X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 18 Aug 2016 13:41:11 -0000 This patch implements the vhost logic from scratch into a single function designed for high performance and better maintainability. Signed-off-by: Zhihong Wang --- lib/librte_vhost/vhost_rxtx.c | 212 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 205 insertions(+), 7 deletions(-) diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index 08a73fd..8e6d782 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb) return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; } -static void +static inline void __attribute__((always_inline)) virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) { if (m_buf->ol_flags & PKT_TX_L4_MASK) { @@ -533,19 +533,217 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, return pkt_idx; } +static inline uint32_t __attribute__((always_inline)) +loop_check(struct vhost_virtqueue *vq, uint16_t avail_idx, uint32_t pkt_left) +{ + if (pkt_left == 0 || avail_idx == vq->last_used_idx) + return 1; + + return 0; +} + +static inline uint32_t __attribute__((always_inline)) +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint16_t avail_idx, struct rte_mbuf *mbuf, + uint32_t is_mrg_rxbuf) +{ + struct virtio_net_hdr_mrg_rxbuf *virtio_hdr; + struct vring_desc *desc; + uint64_t desc_host_write_addr = 0; + uint32_t desc_chain_head = 0; + uint32_t desc_chain_len = 0; + uint32_t desc_current = 0; + uint32_t desc_write_offset = 0; + uint32_t mbuf_len = 0; + uint32_t mbuf_len_left = 0; + uint32_t copy_len = 0; + uint32_t extra_buffers = 0; + uint32_t used_idx_round = 0; + + /* start with the first mbuf of the packet */ + mbuf_len = rte_pktmbuf_data_len(mbuf); + mbuf_len_left = mbuf_len; + + /* get the current desc */ + desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)]; + desc_chain_head = desc_current; + desc = &vq->desc[desc_current]; + desc_host_write_addr = gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_host_write_addr)) + goto error; + + /* handle virtio header */ + virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *) + (uintptr_t)desc_host_write_addr; + memset((void *)(uintptr_t)&(virtio_hdr->hdr), + 0, dev->vhost_hlen); + virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr)); + vhost_log_write(dev, desc->addr, dev->vhost_hlen); + desc_write_offset = dev->vhost_hlen; + desc_chain_len = desc_write_offset; + desc_host_write_addr += desc_write_offset; + if (is_mrg_rxbuf) + virtio_hdr->num_buffers = 1; + + /* start copy from mbuf to desc */ + while (1) { + /* get the next mbuf if the current done */ + if (!mbuf_len_left) { + if (mbuf->next) { + mbuf = mbuf->next; + mbuf_len = rte_pktmbuf_data_len(mbuf); + mbuf_len_left = mbuf_len; + } else + break; + } + + /* get the next desc if the current done */ + if (desc->len <= desc_write_offset) { + if (desc->flags & VRING_DESC_F_NEXT) { + /* go on with the current desc chain */ + desc_write_offset = 0; + desc_current = desc->next; + desc = &vq->desc[desc_current]; + desc_host_write_addr = + gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_host_write_addr)) + goto rollback; + } else if (is_mrg_rxbuf) { + /* start with the next desc chain */ + used_idx_round = vq->last_used_idx + & (vq->size - 1); + vq->used->ring[used_idx_round].id = + desc_chain_head; + vq->used->ring[used_idx_round].len = + desc_chain_len; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, + ring[used_idx_round]), + sizeof(vq->used->ring[ + used_idx_round])); + vq->last_used_idx++; + extra_buffers++; + virtio_hdr->num_buffers++; + if (avail_idx == vq->last_used_idx) + goto rollback; + + desc_current = + vq->avail->ring[(vq->last_used_idx) & + (vq->size - 1)]; + desc_chain_head = desc_current; + desc = &vq->desc[desc_current]; + desc_host_write_addr = + gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_host_write_addr)) + goto rollback; + + desc_chain_len = 0; + desc_write_offset = 0; + } else + goto rollback; + } + + /* copy mbuf data */ + copy_len = RTE_MIN(desc->len - desc_write_offset, + mbuf_len_left); + rte_memcpy((void *)(uintptr_t)desc_host_write_addr, + rte_pktmbuf_mtod_offset(mbuf, void *, + mbuf_len - mbuf_len_left), + copy_len); + vhost_log_write(dev, desc->addr + desc_write_offset, + copy_len); + mbuf_len_left -= copy_len; + desc_write_offset += copy_len; + desc_host_write_addr += copy_len; + desc_chain_len += copy_len; + } + + used_idx_round = vq->last_used_idx & (vq->size - 1); + vq->used->ring[used_idx_round].id = desc_chain_head; + vq->used->ring[used_idx_round].len = desc_chain_len; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx_round]), + sizeof(vq->used->ring[used_idx_round])); + vq->last_used_idx++; + + return 0; + +rollback: + /* rollback on any error if last_used_idx update on-the-fly */ + if (is_mrg_rxbuf) + vq->last_used_idx -= extra_buffers; + +error: + return 1; +} + +static inline void __attribute__((always_inline)) +notify_guest(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + rte_smp_wmb(); + vq->used->idx = vq->last_used_idx; + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); + rte_mb(); + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) + && (vq->callfd >= 0)) + eventfd_write(vq->callfd, (eventfd_t)1); +} + uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id, struct rte_mbuf **pkts, uint16_t count) { - struct virtio_net *dev = get_device(vid); + struct vhost_virtqueue *vq; + struct virtio_net *dev; + uint32_t pkt_idx = 0; + uint32_t pkt_left = 0; + uint32_t pkt_sent = 0; + uint32_t is_mrg_rxbuf = 0; + uint16_t avail_idx = 0; + + /* precheck */ + if (unlikely(count == 0)) + return 0; - if (!dev) + count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); + + dev = get_device(vid); + if (unlikely(!dev)) return 0; - if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) - return virtio_dev_merge_rx(dev, queue_id, pkts, count); - else - return virtio_dev_rx(dev, queue_id, pkts, count); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) + return 0; + + vq = dev->virtqueue[queue_id]; + if (unlikely(!vq->enabled)) + return 0; + + if (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) + is_mrg_rxbuf = 1; + + /* start enqueuing packets 1 by 1 */ + pkt_idx = 0; + pkt_left = count; + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + while (1) { + if (loop_check(vq, avail_idx, pkt_left)) + break; + + if (enqueue_packet(dev, vq, avail_idx, pkts[pkt_idx], + is_mrg_rxbuf)) + break; + + pkt_idx++; + pkt_sent++; + pkt_left--; + } + + /* update used idx and kick the guest if necessary */ + if (pkt_sent) + notify_guest(dev, vq); + + return pkt_sent; } static void -- 2.7.4