From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id BDEE67EB3 for ; Thu, 13 Nov 2014 14:37:25 +0100 (CET) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga102.fm.intel.com with ESMTP; 13 Nov 2014 05:47:21 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.97,862,1389772800"; d="scan'208";a="415992514" Received: from bricha3-mobl3.ger.corp.intel.com ([10.237.220.93]) by FMSMGA003.fm.intel.com with SMTP; 13 Nov 2014 05:37:56 -0800 Received: by (sSMTP sendmail emulation); Thu, 13 Nov 2014 13:46:55 +0025 Date: Thu, 13 Nov 2014 13:46:55 +0000 From: Bruce Richardson To: Anatoly Burakov Message-ID: <20141113134655.GA8224@bricha3-MOBL3> References: <1415619272-8281-1-git-send-email-anatoly.burakov@intel.com> <1415700565-19157-1-git-send-email-anatoly.burakov@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1415700565-19157-1-git-send-email-anatoly.burakov@intel.com> Organization: Intel Shannon Ltd. User-Agent: Mutt/1.5.23 (2014-03-12) Cc: dev@dpdk.org Subject: Re: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 13 Nov 2014 13:37:26 -0000 On Tue, Nov 11, 2014 at 10:09:25AM +0000, Anatoly Burakov wrote: > Multi-process DPDK application must mmap hugepages and PCI resources > into the same virtual address space. By default the virtual addresses > are chosen by the primary process automatically when calling the mmap. > But sometimes the chosen virtual addresses aren't usable in secondary > process - for example, secondary process is linked with more libraries > than primary process, and the library occupies the same address space > that the primary process has requested for PCI mappings. > > This patch makes EAL try and map PCI BARs right after the hugepages > (instead of location chosen by mmap) in virtual memory, so that PCI BARs > have less chance of ending up in random places in virtual memory. > > Signed-off-by: Liang Xu > Signed-off-by: Anatoly Burakov Acked-by: Bruce Richardson > --- > lib/librte_eal/linuxapp/eal/eal_pci.c | 30 ++++++++++++++++------ > lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 13 ++++++++-- > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 19 +++++++++++--- > lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 6 +++++ > 4 files changed, 55 insertions(+), 13 deletions(-) > > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c > index 5fe3961..79fbbb8 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c > @@ -97,6 +97,25 @@ error: > return -1; > } > > +void * > +pci_find_max_end_va(void) > +{ > + const struct rte_memseg *seg = rte_eal_get_physmem_layout(); > + const struct rte_memseg *last = seg; > + unsigned i = 0; > + > + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { > + if (seg->addr == NULL) > + break; > + > + if (seg->addr > last->addr) > + last = seg; > + > + } > + return RTE_PTR_ADD(last->addr, last->len); > +} > + > + > /* map a particular resource from a file */ > void * > pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) > @@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) > /* Map the PCI memory resource of device */ > mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, > MAP_SHARED, fd, offset); > - if (mapaddr == MAP_FAILED || > - (requested_addr != NULL && mapaddr != requested_addr)) { > + if (mapaddr == MAP_FAILED) { > RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", > __func__, fd, requested_addr, > (unsigned long)size, (unsigned long)offset, > strerror(errno), mapaddr); > - goto fail; > + } else { > + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); > } > > - RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); > - > return mapaddr; > - > -fail: > - return NULL; > } > > /* parse the "resource" sysfs file */ > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > index 7e62266..e53f06b 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > @@ -35,6 +35,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -48,6 +49,8 @@ > > static int pci_parse_sysfs_value(const char *filename, uint64_t *val); > > +void *pci_map_addr = NULL; > + > > #define OFF_MAX ((uint64_t)(off_t)-1) > static int > @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev) > if (maps[j].addr != NULL) > fail = 1; > else { > - mapaddr = pci_map_resource(NULL, fd, (off_t)offset, > + /* try mapping somewhere close to the end of hugepages */ > + if (pci_map_addr == NULL) > + pci_map_addr = pci_find_max_end_va(); > + > + mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset, > (size_t)maps[j].size); > - if (mapaddr == NULL) > + if (mapaddr == MAP_FAILED) > fail = 1; > + > + pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) maps[j].size); > } > > if (fail) { > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > index c776ddc..c1246e8 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > @@ -37,6 +37,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev) > if (i == msix_bar) > continue; > > - bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, > - reg.size); > + if (internal_config.process_type == RTE_PROC_PRIMARY) { > + /* try mapping somewhere close to the end of hugepages */ > + if (pci_map_addr == NULL) > + pci_map_addr = pci_find_max_end_va(); > + > + bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset, > + reg.size); > + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); > + } else { > + bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, > + reg.size); > + } > > - if (bar_addr == NULL) { > + if (bar_addr == MAP_FAILED || > + (internal_config.process_type == RTE_PROC_SECONDARY && > + bar_addr != maps[i].addr)) { > RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, > strerror(errno)); > close(vfio_dev_fd); > diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > index d758bee..1070eb8 100644 > --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > @@ -59,6 +59,12 @@ struct mapped_pci_resource { > TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); > extern struct mapped_pci_res_list *pci_res_list; > > +/* > + * Helper function to map PCI resources right after hugepages in virtual memory > + */ > +extern void *pci_map_addr; > +void *pci_find_max_end_va(void); > + > void *pci_map_resource(void *requested_addr, int fd, off_t offset, > size_t size); > > -- > 1.8.1.4 >