From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pd0-f177.google.com (mail-pd0-f177.google.com [209.85.192.177]) by dpdk.org (Postfix) with ESMTP id 416A2806D for ; Tue, 16 Dec 2014 03:38:16 +0100 (CET) Received: by mail-pd0-f177.google.com with SMTP id ft15so12857418pdb.36 for ; Mon, 15 Dec 2014 18:38:15 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:message-id:date:from:user-agent:mime-version:to :cc:subject:references:in-reply-to:content-type :content-transfer-encoding; bh=KuodGTPfISYd/hulBhabweIBTNSo9kKfjRbdCi4qh1I=; b=URrRvyvnllESmPOXqpM/OgpsYXWy8B5yg8iGLL2EW1NcUER+ngJQd9sfQsavzW3XaR E+C2LGHiC3tGnJWDxyUi03x2RHeEezflbq1fWfrMaeBr1dkVptRl1GhmuZCU4AxdDhjK yaFGoGOJ7boKPT8ZUQlT2vnx1L6Q0Xqqo1Juga8pB4w4qdxGD1HrfRqdwJCqhbwrg6Sv 9eW8rUivAPa0OyxAM7EuwURCZyl+uvrOnyvq5LFfHqdcNLYLxmUEKjXjdOKrj8o+iJ4c hWqBTaLd2KV/keS8jly8J+s7d1RMDw5krOPYs3sSqsIrGKynM8SBubACkCeBIYMX0NGi R0gQ== X-Gm-Message-State: ALoCoQlNAaS9Jzuy8hpjWZeAwgOMv+txeylUJpfPDqi4hkGViCNu8uChF4wBWkzYR+ddZ3Y1bHcj X-Received: by 10.68.135.100 with SMTP id pr4mr56081564pbb.123.1418697495458; Mon, 15 Dec 2014 18:38:15 -0800 (PST) Received: from [10.16.129.101] (napt.igel.co.jp. [219.106.231.132]) by mx.google.com with ESMTPSA id ju4sm10479989pbc.81.2014.12.15.18.38.13 for (version=TLSv1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Mon, 15 Dec 2014 18:38:14 -0800 (PST) Message-ID: <548F9B17.40807@igel.co.jp> Date: Tue, 16 Dec 2014 11:38:15 +0900 From: Tetsuya Mukawa User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:24.0) Gecko/20100101 Thunderbird/24.6.0 MIME-Version: 1.0 To: Huawei Xie , dev@dpdk.org References: <1418247477-13920-1-git-send-email-huawei.xie@intel.com> <1418247477-13920-11-git-send-email-huawei.xie@intel.com> In-Reply-To: <1418247477-13920-11-git-send-email-huawei.xie@intel.com> Content-Type: text/plain; charset=ISO-2022-JP Content-Transfer-Encoding: 7bit Cc: haifeng.lin@intel.com Subject: Re: [dpdk-dev] [PATCH RFC v2 10/12] lib/librte_vhost: vhost-user memory region map X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 16 Dec 2014 02:38:17 -0000 (2014/12/11 6:37), Huawei Xie wrote: > deals with vhost user memory map/unmap alignment > > Signed-off-by: Huawei Xie > --- > lib/librte_vhost/rte_virtio_net.h | 2 + > lib/librte_vhost/vhost-net.h | 2 - > lib/librte_vhost/vhost_user/vhost-net-user.h | 3 +- > lib/librte_vhost/vhost_user/virtio-net-user.c | 105 ++++++++++++++++++++++++-- > 4 files changed, 100 insertions(+), 12 deletions(-) > > diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h > index 00b1328..77db80b 100644 > --- a/lib/librte_vhost/rte_virtio_net.h > +++ b/lib/librte_vhost/rte_virtio_net.h > @@ -48,6 +48,8 @@ > #include > #include > > +#define VHOST_MEMORY_MAX_NREGIONS 8 > + > /* Used to indicate that the device is running on a data core */ > #define VIRTIO_DEV_RUNNING 1 > > diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h > index f9ec40b..ec2584f 100644 > --- a/lib/librte_vhost/vhost-net.h > +++ b/lib/librte_vhost/vhost-net.h > @@ -43,8 +43,6 @@ > > #include "rte_virtio_net.h" > > -#define VHOST_MEMORY_MAX_NREGIONS 8 > - > extern struct vhost_net_device_ops const *ops; > > /* Macros for printing using RTE_LOG */ > diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h > index c138844..f4c9d01 100644 > --- a/lib/librte_vhost/vhost_user/vhost-net-user.h > +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h > @@ -37,6 +37,7 @@ > #include > #include > > +#include "rte_virtio_net.h" > #include "fd_man.h" > > struct vhost_server { > @@ -47,8 +48,6 @@ struct vhost_server { > > /* refer to hw/virtio/vhost-user.c */ > > -#define VHOST_MEMORY_MAX_NREGIONS 8 > - > typedef enum VhostUserRequest { > VHOST_USER_NONE = 0, > VHOST_USER_GET_FEATURES = 1, > diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c > index ad59fcc..3aecb17 100644 > --- a/lib/librte_vhost/vhost_user/virtio-net-user.c > +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c > @@ -36,7 +36,11 @@ > #include > #include > #include > +#include > +#include > +#include > > +#include > #include > > #include "virtio-net.h" > @@ -44,13 +48,56 @@ > #include "vhost-net-user.h" > #include "vhost-net.h" > > +struct orig_region_map { > + int fd; > + uint64_t mapped_address; > + uint64_t mapped_size; > + uint64_t blksz; > +}; > + > +#define orig_region(ptr, nregions) (struct orig_region_map *)RTE_PTR_ADD(ptr, sizeof(struct virtio_memory) + sizeof(struct virtio_memory_regions) * (nregions)) > + > +static uint64_t > +get_blk_size(int fd) > +{ > + struct stat stat; > + fstat(fd, &stat); > + return (uint64_t)stat.st_blksize; > +} I've also confirmed we can get hugepage size of the fd using st_blksize. If someone wants to run QEMU on 2MB hugepage, but DPDK backend is on 1GB, even in such a case, we will also be able to mmap and munmap QEMU backend memory correctly. So I guess using st_blksize is smart workaround not to hit munmap issue. > + > int > user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) > { > - unsigned int idx; > struct VhostUserMemory memory = pmsg->payload.memory; > struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS]; > - uint64_t mapped_address, base_address = 0; > + uint64_t mapped_address, mapped_size, base_address = 0; > + struct virtio_net *dev; > + unsigned int idx = 0; > + struct orig_region_map tmp[VHOST_MEMORY_MAX_NREGIONS] = > + { [0 ... VHOST_MEMORY_MAX_NREGIONS - 1] = { 0 } }; > + struct orig_region_map *region; > + uint64_t alignment; > + int ret; > + > + /* unmap old memory regions one by one*/ > + dev = get_device(ctx); > + if (dev->mem) { > + region = orig_region(dev->mem, dev->mem->nregions); > + for (idx = 0; idx < dev->mem->nregions; idx++) { > + if (region[idx].mapped_address) { > + alignment = region[idx].blksz; > + printf("Freeing %p\n", > + (void *)(uintptr_t)region[idx].mapped_address); > + ret = munmap((void *)RTE_ALIGN_FLOOR(region[idx].mapped_address, alignment), > + RTE_ALIGN_CEIL(region[idx].mapped_size, alignment)); > + printf("munmap ret= %d\n", ret); > + printf("close file %d\n", region[idx].fd); > + close(region[idx].fd); > + } > + } > + free(dev->mem); > + dev->mem = NULL; > + } > > for (idx = 0; idx < memory.nregions; idx++) { > if (memory.regions[idx].guest_phys_addr == 0) > @@ -73,22 +120,30 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) > memory.regions[idx].userspace_addr; > > /* This is ugly */ > + mapped_size = regions[idx].memory_size + > + memory.regions[idx].mmap_offset; > mapped_address = (uint64_t)(uintptr_t)mmap(NULL, > - regions[idx].memory_size + > - memory.regions[idx].mmap_offset, > + mapped_size, > PROT_READ | PROT_WRITE, MAP_SHARED, > pmsg->fds[idx], > 0); > + > RTE_LOG(INFO, VHOST_CONFIG, > - "mapped region %d to %p\n", > - idx, (void *)mapped_address); > + "mapped region %d fd:%d to %p sz:0x%"PRIx64" off:0x%"PRIx64"\n", > + idx, pmsg->fds[idx], (void *)mapped_address, > + mapped_size, memory.regions[idx].mmap_offset); > > if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) { > RTE_LOG(ERR, VHOST_CONFIG, > "mmap qemu guest failed.\n"); > - return -1; > + goto err; > } > > + tmp[idx].mapped_address = mapped_address; > + tmp[idx].mapped_size = mapped_size; > + tmp[idx].blksz = get_blk_size(pmsg->fds[idx]); > + tmp[idx].fd = pmsg->fds[idx]; > + > mapped_address += memory.regions[idx].mmap_offset; > > regions[idx].address_offset = mapped_address - > @@ -100,10 +155,44 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) > (void *)(uintptr_t)regions[idx].userspace_address, > regions[idx].memory_size); > } > + > ops->set_mem_table(ctx, regions, memory.nregions); > + > + if (dev->mem) { > + void *tmp_mem; > + tmp_mem = realloc(dev->mem, > + sizeof(struct virtio_memory) + > + sizeof(struct virtio_memory_regions) * memory.nregions + > + sizeof(struct orig_region_map) * memory.nregions); > + if (tmp_mem == NULL) { > + goto err_realloc; > + } > + dev->mem = tmp_mem; > + region = orig_region(dev->mem, memory.nregions); > + for (idx = 0; idx < memory.nregions; idx++) { > + region[idx].mapped_address = tmp[idx].mapped_address; > + region[idx].mapped_size = tmp[idx].mapped_size; > + region[idx].blksz = tmp[idx].blksz; > + region[idx].fd = tmp[idx].fd; > + } > + } else > + goto err_set_mem_table; > + > return 0; > -} > > +err_realloc: > + free(dev->mem); > +err_set_mem_table: > +err: > + while (idx--) { > + alignment = tmp[idx].blksz; > + munmap((void *)RTE_ALIGN_FLOOR(tmp[idx].mapped_address, alignment), > + RTE_ALIGN_CEIL(tmp[idx].mapped_size, alignment)); > + close(tmp[idx].fd); > + } > + dev->mem = NULL; > + return -1; > +} > > static int > virtio_is_ready(struct virtio_net *dev)