From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id CCEF11B323 for ; Fri, 19 Jan 2018 14:47:01 +0100 (CET) Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 327A0356F4; Fri, 19 Jan 2018 13:47:01 +0000 (UTC) Received: from localhost (ovpn-116-254.ams2.redhat.com [10.36.116.254]) by smtp.corp.redhat.com (Postfix) with ESMTP id AF31C19167; Fri, 19 Jan 2018 13:46:52 +0000 (UTC) From: Stefan Hajnoczi To: dev@dpdk.org Cc: maxime.coquelin@redhat.com, Yuanhan Liu , wei.w.wang@intel.com, mst@redhat.com, zhiyong.yang@intel.com, jasowang@redhat.com, Stefan Hajnoczi Date: Fri, 19 Jan 2018 13:44:33 +0000 Message-Id: <20180119134444.24927-14-stefanha@redhat.com> In-Reply-To: <20180119134444.24927-1-stefanha@redhat.com> References: <20180119134444.24927-1-stefanha@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Fri, 19 Jan 2018 13:47:01 +0000 (UTC) Subject: [dpdk-dev] [RFC 13/24] vhost: move mmap/munmap to AF_UNIX transport X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 19 Jan 2018 13:47:02 -0000 How mem table regions are mapped is transport-specific, so move the mmap code into trans_af_unix.c. The new .map_mem_table()/.unmap_mem_table() interfaces allow transports to perform the mapping and unmapping. Drop the "mmap align:" debug output because the alignment is no longer available from vhost_user.c. Signed-off-by: Stefan Hajnoczi --- lib/librte_vhost/vhost.h | 17 +++++++ lib/librte_vhost/vhost_user.h | 3 ++ lib/librte_vhost/trans_af_unix.c | 78 +++++++++++++++++++++++++++++++++ lib/librte_vhost/vhost_user.c | 95 ++++++++++------------------------------ 4 files changed, 121 insertions(+), 72 deletions(-) diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 60e4d10bd..a50b802e7 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -315,6 +315,23 @@ struct vhost_transport_ops { */ int (*set_slave_req_fd)(struct virtio_net *dev, struct VhostUserMsg *msg); + + /** + * Map memory table regions in dev->mem->regions[]. + * + * @param dev + * vhost device + */ + int (*map_mem_regions)(struct virtio_net *dev); + + /** + * Unmap memory table regions in dev->mem->regions[] and free any + * resources, such as file descriptors. + * + * @param dev + * vhost device + */ + void (*unmap_mem_regions)(struct virtio_net *dev); }; /** The traditional AF_UNIX vhost-user protocol transport. */ diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index dec658dff..4181f34c9 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -110,6 +110,9 @@ typedef struct VhostUserMsg { /* vhost_user.c */ +void vhost_add_guest_pages(struct virtio_net *dev, + struct rte_vhost_mem_region *reg, + uint64_t page_size); int vhost_user_msg_handler(int vid, const struct VhostUserMsg *msg); int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm); diff --git a/lib/librte_vhost/trans_af_unix.c b/lib/librte_vhost/trans_af_unix.c index 7128e121e..d3a5519b7 100644 --- a/lib/librte_vhost/trans_af_unix.c +++ b/lib/librte_vhost/trans_af_unix.c @@ -34,6 +34,8 @@ */ #include +#include +#include #include #include @@ -703,6 +705,80 @@ af_unix_vring_call(struct virtio_net *dev __rte_unused, return 0; } +static uint64_t +get_blk_size(int fd) +{ + struct stat stat; + int ret; + + ret = fstat(fd, &stat); + return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; +} + +static int +af_unix_map_mem_regions(struct virtio_net *dev) +{ + uint32_t i; + + for (i = 0; i < dev->mem->nregions; i++) { + struct rte_vhost_mem_region *reg = &dev->mem->regions[i]; + uint64_t mmap_size = reg->mmap_size; + uint64_t mmap_offset = mmap_size - reg->size; + uint64_t alignment; + void *mmap_addr; + + /* mmap() without flag of MAP_ANONYMOUS, should be called + * with length argument aligned with hugepagesz at older + * longterm version Linux, like 2.6.32 and 3.2.72, or + * mmap() will fail with EINVAL. + * + * to avoid failure, make sure in caller to keep length + * aligned. + */ + alignment = get_blk_size(reg->fd); + if (alignment == (uint64_t)-1) { + RTE_LOG(ERR, VHOST_CONFIG, + "couldn't get hugepage size through fstat\n"); + return -1; + } + mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); + + mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, reg->fd, 0); + + if (mmap_addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap region %u failed.\n", i); + return -1; + } + + reg->mmap_addr = mmap_addr; + reg->mmap_size = mmap_size; + reg->host_user_addr = (uint64_t)(uintptr_t)reg->mmap_addr + + mmap_offset; + + if (dev->dequeue_zero_copy) + vhost_add_guest_pages(dev, reg, alignment); + } + + return 0; +} + +static void +af_unix_unmap_mem_regions(struct virtio_net *dev) +{ + uint32_t i; + struct rte_vhost_mem_region *reg; + + for (i = 0; i < dev->mem->nregions; i++) { + reg = &dev->mem->regions[i]; + if (reg->host_user_addr) { + munmap(reg->mmap_addr, reg->mmap_size); + close(reg->fd); + } + } +} + const struct vhost_transport_ops af_unix_trans_ops = { .socket_size = sizeof(struct af_unix_socket), .device_size = sizeof(struct vhost_user_connection), @@ -714,4 +790,6 @@ const struct vhost_transport_ops af_unix_trans_ops = { .send_reply = af_unix_send_reply, .send_slave_req = af_unix_send_slave_req, .set_slave_req_fd = af_unix_set_slave_req_fd, + .map_mem_regions = af_unix_map_mem_regions, + .unmap_mem_regions = af_unix_unmap_mem_regions, }; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index ee1b0a1a2..a819684b4 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -52,32 +52,13 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG", }; -static uint64_t -get_blk_size(int fd) -{ - struct stat stat; - int ret; - - ret = fstat(fd, &stat); - return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; -} - static void free_mem_region(struct virtio_net *dev) { - uint32_t i; - struct rte_vhost_mem_region *reg; - if (!dev || !dev->mem) return; - for (i = 0; i < dev->mem->nregions; i++) { - reg = &dev->mem->regions[i]; - if (reg->host_user_addr) { - munmap(reg->mmap_addr, reg->mmap_size); - close(reg->fd); - } - } + dev->trans_ops->unmap_mem_regions(dev); } void @@ -516,9 +497,9 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, page->size = size; } -static void -add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, - uint64_t page_size) +void +vhost_add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, + uint64_t page_size) { uint64_t reg_size = reg->size; uint64_t host_user_addr = reg->host_user_addr; @@ -602,19 +583,17 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) { struct VhostUserMemory memory = pmsg->payload.memory; struct rte_vhost_mem_region *reg; - void *mmap_addr; - uint64_t mmap_size; - uint64_t mmap_offset; - uint64_t alignment; uint32_t i; - int fd; if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) { RTE_LOG(INFO, VHOST_CONFIG, "(%d) memory regions not changed\n", dev->vid); - for (i = 0; i < memory.nregions; i++) - close(pmsg->fds[i]); + for (i = 0; i < memory.nregions; i++) { + if (pmsg->fds[i] >= 0) { + close(pmsg->fds[i]); + } + } return 0; } @@ -649,50 +628,24 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) } dev->mem->nregions = memory.nregions; + /* Fill in dev->mem->regions[] */ for (i = 0; i < memory.nregions; i++) { - fd = pmsg->fds[i]; reg = &dev->mem->regions[i]; reg->guest_phys_addr = memory.regions[i].guest_phys_addr; reg->guest_user_addr = memory.regions[i].userspace_addr; reg->size = memory.regions[i].memory_size; - reg->fd = fd; + reg->mmap_size = reg->size + memory.regions[i].mmap_offset; + reg->mmap_addr = NULL; + reg->host_user_addr = 0; + reg->fd = pmsg->fds[i]; + } - mmap_offset = memory.regions[i].mmap_offset; - mmap_size = reg->size + mmap_offset; + if (dev->trans_ops->map_mem_regions(dev) < 0) + goto err; - /* mmap() without flag of MAP_ANONYMOUS, should be called - * with length argument aligned with hugepagesz at older - * longterm version Linux, like 2.6.32 and 3.2.72, or - * mmap() will fail with EINVAL. - * - * to avoid failure, make sure in caller to keep length - * aligned. - */ - alignment = get_blk_size(fd); - if (alignment == (uint64_t)-1) { - RTE_LOG(ERR, VHOST_CONFIG, - "couldn't get hugepage size through fstat\n"); - goto err_mmap; - } - mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); - - mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, 0); - - if (mmap_addr == MAP_FAILED) { - RTE_LOG(ERR, VHOST_CONFIG, - "mmap region %u failed.\n", i); - goto err_mmap; - } - - reg->mmap_addr = mmap_addr; - reg->mmap_size = mmap_size; - reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + - mmap_offset; - - if (dev->dequeue_zero_copy) - add_guest_pages(dev, reg, alignment); + for (i = 0; i < memory.nregions; i++) { + reg = &dev->mem->regions[i]; RTE_LOG(INFO, VHOST_CONFIG, "guest memory region %u, size: 0x%" PRIx64 "\n" @@ -701,23 +654,21 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) "\t host virtual addr: 0x%" PRIx64 "\n" "\t mmap addr : 0x%" PRIx64 "\n" "\t mmap size : 0x%" PRIx64 "\n" - "\t mmap align: 0x%" PRIx64 "\n" "\t mmap off : 0x%" PRIx64 "\n", i, reg->size, reg->guest_phys_addr, reg->guest_user_addr, reg->host_user_addr, - (uint64_t)(uintptr_t)mmap_addr, - mmap_size, - alignment, - mmap_offset); + (uint64_t)(uintptr_t)reg->mmap_addr, + reg->mmap_size, + memory.regions[i].mmap_offset); } dump_guest_pages(dev); return 0; -err_mmap: +err: free_mem_region(dev); rte_free(dev->mem); dev->mem = NULL; -- 2.14.3