From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 15DEDA0559; Tue, 17 Mar 2020 03:43:27 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 840F51C0C7; Tue, 17 Mar 2020 03:43:15 +0100 (CET) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id DDBB71C0B1 for ; Tue, 17 Mar 2020 03:43:10 +0100 (CET) IronPort-SDR: ydBbF+wm/EE2Nj8KDG1LkmxtjnKxHFTpCqbrVbRmTZxWhiz+q7fFrXy3cu4YzUWa5xjW2+LsPz YSlRDcusQSDg== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga105.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 19:43:10 -0700 IronPort-SDR: yl0W+CGgbrQHukwRaKnuQVVEthR3VBKJkJcS4wamGgmYVqCsiv6QhIV/7SMCb/TmDjHofZ0vob C0wqGRGq36kg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,562,1574150400"; d="scan'208";a="417390191" Received: from npg_dpdk_virtio_jiayuhu_07.sh.intel.com ([10.67.119.35]) by orsmga005.jf.intel.com with ESMTP; 16 Mar 2020 19:43:08 -0700 From: Jiayu Hu To: dev@dpdk.org Cc: maxime.coquelin@redhat.com, xiaolong.ye@intel.com, zhihong.wang@intel.com, Jiayu Hu Date: Tue, 17 Mar 2020 05:21:23 -0400 Message-Id: <1584436885-18651-3-git-send-email-jiayu.hu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1584436885-18651-1-git-send-email-jiayu.hu@intel.com> References: <1584436885-18651-1-git-send-email-jiayu.hu@intel.com> Subject: [dpdk-dev] [PATCH 2/4] net/vhost: setup vrings for DMA-accelerated datapath X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch gets vrings' addresses and sets up GPA and HPA mappings for offloading large data movement from the CPU to DMA engines in vhost-user PMD. Signed-off-by: Jiayu Hu --- drivers/Makefile | 2 +- drivers/net/vhost/Makefile | 4 +- drivers/net/vhost/internal.h | 141 ++++++++++++++++++++++++++++++++ drivers/net/vhost/meson.build | 3 +- drivers/net/vhost/rte_eth_vhost.c | 56 +------------ drivers/net/vhost/virtio_net.c | 119 +++++++++++++++++++++++++++ drivers/net/vhost/virtio_net.h | 168 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 438 insertions(+), 55 deletions(-) create mode 100644 drivers/net/vhost/internal.h create mode 100644 drivers/net/vhost/virtio_net.c create mode 100644 drivers/net/vhost/virtio_net.h diff --git a/drivers/Makefile b/drivers/Makefile index c70bdf9..8555ddd 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -9,7 +9,7 @@ DEPDIRS-bus := common DIRS-y += mempool DEPDIRS-mempool := common bus DIRS-y += net -DEPDIRS-net := common bus mempool +DEPDIRS-net := common bus mempool raw DIRS-$(CONFIG_RTE_LIBRTE_BBDEV) += baseband DEPDIRS-baseband := common bus mempool DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += crypto diff --git a/drivers/net/vhost/Makefile b/drivers/net/vhost/Makefile index 0461e29..19cae52 100644 --- a/drivers/net/vhost/Makefile +++ b/drivers/net/vhost/Makefile @@ -15,13 +15,15 @@ LDLIBS += -lrte_bus_vdev CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -fno-strict-aliasing +CFLAGS += -DALLOW_EXPERIMENTAL_API EXPORT_MAP := rte_pmd_vhost_version.map # # all source are stored in SRCS-y # -SRCS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += rte_eth_vhost.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += rte_eth_vhost.c virtio_net.c # # Export include files diff --git a/drivers/net/vhost/internal.h b/drivers/net/vhost/internal.h new file mode 100644 index 0000000..7588fdf --- /dev/null +++ b/drivers/net/vhost/internal.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ +#ifndef _INTERNAL_H_ +#define _INTERNAL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include +#include +#include + +extern int vhost_logtype; + +#define VHOST_LOG(level, ...) \ + rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__) + +enum vhost_xstats_pkts { + VHOST_UNDERSIZE_PKT = 0, + VHOST_64_PKT, + VHOST_65_TO_127_PKT, + VHOST_128_TO_255_PKT, + VHOST_256_TO_511_PKT, + VHOST_512_TO_1023_PKT, + VHOST_1024_TO_1522_PKT, + VHOST_1523_TO_MAX_PKT, + VHOST_BROADCAST_PKT, + VHOST_MULTICAST_PKT, + VHOST_UNICAST_PKT, + VHOST_ERRORS_PKT, + VHOST_ERRORS_FRAGMENTED, + VHOST_ERRORS_JABBER, + VHOST_UNKNOWN_PROTOCOL, + VHOST_XSTATS_MAX, +}; + +struct vhost_stats { + uint64_t pkts; + uint64_t bytes; + uint64_t missed_pkts; + uint64_t xstats[VHOST_XSTATS_MAX]; +}; + +struct batch_copy_elem { + void *dst; + void *src; + uint32_t len; +}; + +struct guest_page { + uint64_t guest_phys_addr; + uint64_t host_phys_addr; + uint64_t size; +}; + +struct dma_vring { + struct rte_vhost_vring vr; + + uint16_t last_avail_idx; + uint16_t last_used_idx; + + /* the last used index that front end can consume */ + uint16_t copy_done_used; + + uint16_t signalled_used; + bool signalled_used_valid; + + struct vring_used_elem *shadow_used_split; + uint16_t shadow_used_idx; + + struct batch_copy_elem *batch_copy_elems; + uint16_t batch_copy_nb_elems; + + bool dma_enabled; + /** + * DMA ID. Currently, we only support I/OAT, + * so it's I/OAT rawdev ID. + */ + uint16_t dev_id; + /* DMA address */ + struct rte_pci_addr dma_addr; + /** + * the number of copy jobs that are submitted to the DMA + * but may not be completed. + */ + uint64_t nr_inflight; + int nr_batching; + + /** + * host physical address of used ring index, + * used by the DMA. + */ + phys_addr_t used_idx_hpa; +}; + +struct vhost_queue { + int vid; + rte_atomic32_t allow_queuing; + rte_atomic32_t while_queuing; + struct pmd_internal *internal; + struct rte_mempool *mb_pool; + uint16_t port; + uint16_t virtqueue_id; + struct vhost_stats stats; + struct dma_vring *dma_vring; +}; + +struct pmd_internal { + rte_atomic32_t dev_attached; + char *iface_name; + uint64_t flags; + uint64_t disable_flags; + uint16_t max_queues; + int vid; + rte_atomic32_t started; + uint8_t vlan_strip; + + /* guest's memory regions */ + struct rte_vhost_memory *mem; + /* guest and host physical address mapping table */ + struct guest_page *guest_pages; + uint32_t nr_guest_pages; + uint32_t max_guest_pages; + /* guest's vrings */ + struct dma_vring dma_vrings[RTE_MAX_QUEUES_PER_PORT * 2]; + uint16_t nr_vrings; + /* negotiated features */ + uint64_t features; + size_t hdr_len; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _INTERNAL_H_ */ diff --git a/drivers/net/vhost/meson.build b/drivers/net/vhost/meson.build index d793086..b308dcb 100644 --- a/drivers/net/vhost/meson.build +++ b/drivers/net/vhost/meson.build @@ -3,6 +3,7 @@ build = dpdk_conf.has('RTE_LIBRTE_VHOST') reason = 'missing dependency, DPDK vhost library' -sources = files('rte_eth_vhost.c') +sources = files('rte_eth_vhost.c', + 'virtio_net.c') install_headers('rte_eth_vhost.h') deps += 'vhost' diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c index 458ed58..b5c927c 100644 --- a/drivers/net/vhost/rte_eth_vhost.c +++ b/drivers/net/vhost/rte_eth_vhost.c @@ -16,12 +16,10 @@ #include #include +#include "internal.h" #include "rte_eth_vhost.h" -static int vhost_logtype; - -#define VHOST_LOG(level, ...) \ - rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__) +int vhost_logtype; enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; @@ -56,54 +54,6 @@ static struct rte_ether_addr base_eth_addr = { } }; -enum vhost_xstats_pkts { - VHOST_UNDERSIZE_PKT = 0, - VHOST_64_PKT, - VHOST_65_TO_127_PKT, - VHOST_128_TO_255_PKT, - VHOST_256_TO_511_PKT, - VHOST_512_TO_1023_PKT, - VHOST_1024_TO_1522_PKT, - VHOST_1523_TO_MAX_PKT, - VHOST_BROADCAST_PKT, - VHOST_MULTICAST_PKT, - VHOST_UNICAST_PKT, - VHOST_ERRORS_PKT, - VHOST_ERRORS_FRAGMENTED, - VHOST_ERRORS_JABBER, - VHOST_UNKNOWN_PROTOCOL, - VHOST_XSTATS_MAX, -}; - -struct vhost_stats { - uint64_t pkts; - uint64_t bytes; - uint64_t missed_pkts; - uint64_t xstats[VHOST_XSTATS_MAX]; -}; - -struct vhost_queue { - int vid; - rte_atomic32_t allow_queuing; - rte_atomic32_t while_queuing; - struct pmd_internal *internal; - struct rte_mempool *mb_pool; - uint16_t port; - uint16_t virtqueue_id; - struct vhost_stats stats; -}; - -struct pmd_internal { - rte_atomic32_t dev_attached; - char *iface_name; - uint64_t flags; - uint64_t disable_flags; - uint16_t max_queues; - int vid; - rte_atomic32_t started; - uint8_t vlan_strip; -}; - struct internal_list { TAILQ_ENTRY(internal_list) next; struct rte_eth_dev *eth_dev; @@ -698,6 +648,7 @@ queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal) vq->vid = internal->vid; vq->internal = internal; vq->port = eth_dev->data->port_id; + vq->dma_vring = &internal->dma_vrings[vq->virtqueue_id]; } for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { vq = eth_dev->data->tx_queues[i]; @@ -706,6 +657,7 @@ queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal) vq->vid = internal->vid; vq->internal = internal; vq->port = eth_dev->data->port_id; + vq->dma_vring = &internal->dma_vrings[vq->virtqueue_id]; } } diff --git a/drivers/net/vhost/virtio_net.c b/drivers/net/vhost/virtio_net.c new file mode 100644 index 0000000..11591c0 --- /dev/null +++ b/drivers/net/vhost/virtio_net.c @@ -0,0 +1,119 @@ +#include +#include +#include + +#include +#include + +#include "virtio_net.h" + +int +vhost_dma_setup(struct pmd_internal *dev) +{ + struct dma_vring *dma_vr; + int vid = dev->vid; + int ret; + uint16_t i, j, size; + + rte_vhost_get_negotiated_features(vid, &dev->features); + + if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) + dev->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + dev->hdr_len = sizeof(struct virtio_net_hdr); + + dev->nr_vrings = rte_vhost_get_vring_num(vid); + + if (rte_vhost_get_mem_table(vid, &dev->mem) < 0) { + VHOST_LOG(ERR, "Failed to get guest memory regions\n"); + return -1; + } + + /* set up gpa and hpa mappings */ + if (setup_guest_pages(dev, dev->mem) < 0) { + VHOST_LOG(ERR, "Failed to set up hpa and gpa mappings\n"); + free(dev->mem); + return -1; + } + + for (i = 0; i < dev->nr_vrings; i++) { + dma_vr = &dev->dma_vrings[i]; + + ret = rte_vhost_get_vring_base(vid, i, &dma_vr->last_avail_idx, + &dma_vr->last_used_idx); + if (ret < 0) { + VHOST_LOG(ERR, "Failed to get vring index.\n"); + goto err; + } + + ret = rte_vhost_get_vhost_vring(vid, i, &dma_vr->vr); + if (ret < 0) { + VHOST_LOG(ERR, "Failed to get vring address.\n"); + goto err; + } + + size = dma_vr->vr.size; + dma_vr->shadow_used_split = + rte_malloc(NULL, size * sizeof(struct vring_used_elem), + RTE_CACHE_LINE_SIZE); + if (dma_vr->shadow_used_split == NULL) + goto err; + + dma_vr->batch_copy_elems = + rte_malloc(NULL, size * sizeof(struct batch_copy_elem), + RTE_CACHE_LINE_SIZE); + if (dma_vr->batch_copy_elems == NULL) + goto err; + + /* get HPA of used ring's index */ + dma_vr->used_idx_hpa = + rte_mem_virt2iova(&dma_vr->vr.used->idx); + + dma_vr->copy_done_used = dma_vr->last_used_idx; + dma_vr->signalled_used = dma_vr->last_used_idx; + dma_vr->signalled_used_valid = false; + dma_vr->shadow_used_idx = 0; + dma_vr->batch_copy_nb_elems = 0; + } + + return 0; + +err: + for (j = 0; j <= i; j++) { + dma_vr = &dev->dma_vrings[j]; + rte_free(dma_vr->shadow_used_split); + rte_free(dma_vr->batch_copy_elems); + dma_vr->shadow_used_split = NULL; + dma_vr->batch_copy_elems = NULL; + dma_vr->used_idx_hpa = 0; + } + + free(dev->mem); + dev->mem = NULL; + free(dev->guest_pages); + dev->guest_pages = NULL; + + return -1; +} + +void +vhost_dma_remove(struct pmd_internal *dev) +{ + struct dma_vring *dma_vr; + uint16_t i; + + for (i = 0; i < dev->nr_vrings; i++) { + dma_vr = &dev->dma_vrings[i]; + rte_free(dma_vr->shadow_used_split); + rte_free(dma_vr->batch_copy_elems); + dma_vr->shadow_used_split = NULL; + dma_vr->batch_copy_elems = NULL; + dma_vr->signalled_used_valid = false; + dma_vr->used_idx_hpa = 0; + } + + free(dev->mem); + dev->mem = NULL; + free(dev->guest_pages); + dev->guest_pages = NULL; +} diff --git a/drivers/net/vhost/virtio_net.h b/drivers/net/vhost/virtio_net.h new file mode 100644 index 0000000..7f99f1d --- /dev/null +++ b/drivers/net/vhost/virtio_net.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ +#ifndef _VIRTIO_NET_H_ +#define _VIRTIO_NET_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#include "internal.h" + +static uint64_t +get_blk_size(int fd) +{ + struct stat stat; + int ret; + + ret = fstat(fd, &stat); + return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; +} + +static __rte_always_inline int +add_one_guest_page(struct pmd_internal *dev, uint64_t guest_phys_addr, + uint64_t host_phys_addr, uint64_t size) +{ + struct guest_page *page, *last_page; + struct guest_page *old_pages; + + if (dev->nr_guest_pages == dev->max_guest_pages) { + dev->max_guest_pages *= 2; + old_pages = dev->guest_pages; + dev->guest_pages = realloc(dev->guest_pages, + dev->max_guest_pages * + sizeof(*page)); + if (!dev->guest_pages) { + VHOST_LOG(ERR, "Cannot realloc guest_pages\n"); + free(old_pages); + return -1; + } + } + + if (dev->nr_guest_pages > 0) { + last_page = &dev->guest_pages[dev->nr_guest_pages - 1]; + /* merge if the two pages are continuous */ + if (host_phys_addr == last_page->host_phys_addr + + last_page->size) { + last_page->size += size; + return 0; + } + } + + page = &dev->guest_pages[dev->nr_guest_pages++]; + page->guest_phys_addr = guest_phys_addr; + page->host_phys_addr = host_phys_addr; + page->size = size; + + return 0; +} + +static __rte_always_inline int +add_guest_page(struct pmd_internal *dev, struct rte_vhost_mem_region *reg) +{ + uint64_t reg_size = reg->size; + uint64_t host_user_addr = reg->host_user_addr; + uint64_t guest_phys_addr = reg->guest_phys_addr; + uint64_t host_phys_addr; + uint64_t size, page_size; + + page_size = get_blk_size(reg->fd); + if (page_size == (uint64_t)-1) { + VHOST_LOG(ERR, "Cannot get hugepage size through fstat\n"); + return -1; + } + + host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)host_user_addr); + size = page_size - (guest_phys_addr & (page_size - 1)); + size = RTE_MIN(size, reg_size); + + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0) + return -1; + + host_user_addr += size; + guest_phys_addr += size; + reg_size -= size; + + while (reg_size > 0) { + size = RTE_MIN(reg_size, page_size); + host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t) + host_user_addr); + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, + size) < 0) + return -1; + + host_user_addr += size; + guest_phys_addr += size; + reg_size -= size; + } + + return 0; +} + +static __rte_always_inline int +setup_guest_pages(struct pmd_internal *dev, struct rte_vhost_memory *mem) +{ + uint32_t nr_regions = mem->nregions; + uint32_t i; + + dev->nr_guest_pages = 0; + dev->max_guest_pages = 8; + + dev->guest_pages = malloc(dev->max_guest_pages * + sizeof(struct guest_page)); + if (dev->guest_pages == NULL) { + VHOST_LOG(ERR, "(%d) failed to allocate memory " + "for dev->guest_pages\n", dev->vid); + return -1; + } + + for (i = 0; i < nr_regions; i++) { + if (add_guest_page(dev, &mem->regions[i]) < 0) + return -1; + } + + return 0; +} + +static __rte_always_inline rte_iova_t +gpa_to_hpa(struct pmd_internal *dev, uint64_t gpa, uint64_t size) +{ + uint32_t i; + struct guest_page *page; + + for (i = 0; i < dev->nr_guest_pages; i++) { + page = &dev->guest_pages[i]; + + if (gpa >= page->guest_phys_addr && + gpa + size < page->guest_phys_addr + page->size) { + return gpa - page->guest_phys_addr + + page->host_phys_addr; + } + } + + return 0; +} + +/** + * This function gets front end's memory and vrings information. + * In addition, it sets up necessary data structures for enqueue + * and dequeue operations. + */ +int vhost_dma_setup(struct pmd_internal *dev); + +/** + * This function destroys front end's information and frees data + * structures for enqueue and dequeue operations. + */ +void vhost_dma_remove(struct pmd_internal *dev); + +#ifdef __cplusplus +} +#endif + +#endif /* _VIRTIO_NET_H_ */ -- 2.7.4