From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id 17A39594F for ; Thu, 13 Nov 2014 12:24:45 +0100 (CET) Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga101.jf.intel.com with ESMTP; 13 Nov 2014 03:34:25 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.07,376,1413270000"; d="scan'208";a="607185879" Received: from irsmsx104.ger.corp.intel.com ([163.33.3.159]) by orsmga001.jf.intel.com with ESMTP; 13 Nov 2014 03:34:24 -0800 Received: from irsmsx109.ger.corp.intel.com ([169.254.13.101]) by IRSMSX104.ger.corp.intel.com ([169.254.5.116]) with mapi id 14.03.0195.001; Thu, 13 Nov 2014 11:34:23 +0000 From: "Burakov, Anatoly" To: "dev@dpdk.org" Thread-Topic: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages Thread-Index: AQHP/Ze/g7jr2A7Af06zlboPmaVZspxeb8uA Date: Thu, 13 Nov 2014 11:34:22 +0000 Message-ID: References: <1415619272-8281-1-git-send-email-anatoly.burakov@intel.com> <1415700565-19157-1-git-send-email-anatoly.burakov@intel.com> In-Reply-To: <1415700565-19157-1-git-send-email-anatoly.burakov@intel.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [163.33.239.180] Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Subject: Re: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 13 Nov 2014 11:24:46 -0000 Hi Thomas and all Are there any objections to this patch? If there are no objections to it, c= ould someone perhaps ack it? Thanks, Anatoly -----Original Message----- From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Anatoly Burakov Sent: Tuesday, November 11, 2014 10:09 AM To: dev@dpdk.org Subject: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepage= s Multi-process DPDK application must mmap hugepages and PCI resources into t= he same virtual address space. By default the virtual addresses are chosen = by the primary process automatically when calling the mmap. But sometimes the chosen virtual addresses aren't usable in secondary proce= ss - for example, secondary process is linked with more libraries than prim= ary process, and the library occupies the same address space that the prima= ry process has requested for PCI mappings. This patch makes EAL try and map PCI BARs right after the hugepages (instea= d of location chosen by mmap) in virtual memory, so that PCI BARs have less= chance of ending up in random places in virtual memory. Signed-off-by: Liang Xu Signed-off-by: Anatoly Burakov --- lib/librte_eal/linuxapp/eal/eal_pci.c | 30 ++++++++++++++++--= ---- lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 13 ++++++++-- lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 19 +++++++++++--- lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 6 +++++ 4 files changed, 55 insertions(+), 13 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxap= p/eal/eal_pci.c index 5fe3961..79fbbb8 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -97,6 +97,25 @@ error: return -1; } =20 +void * +pci_find_max_end_va(void) +{ + const struct rte_memseg *seg =3D rte_eal_get_physmem_layout(); + const struct rte_memseg *last =3D seg; + unsigned i =3D 0; + + for (i =3D 0; i < RTE_MAX_MEMSEG; i++, seg++) { + if (seg->addr =3D=3D NULL) + break; + + if (seg->addr > last->addr) + last =3D seg; + + } + return RTE_PTR_ADD(last->addr, last->len); } + + /* map a particular resource from a file */ void * pci_map_resource(void= *requested_addr, int fd, off_t offset, size_t size) @@ -106,21 +125,16 @@ = pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) /* Map the PCI memory resource of device */ mapaddr =3D mmap(requested_addr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); - if (mapaddr =3D=3D MAP_FAILED || - (requested_addr !=3D NULL && mapaddr !=3D requested_addr)) { + if (mapaddr =3D=3D MAP_FAILED) { RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", __func__, fd, requested_addr, (unsigned long)size, (unsigned long)offset, strerror(errno), mapaddr); - goto fail; + } else { + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); } =20 - RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); - return mapaddr; - -fail: - return NULL; } =20 /* parse the "resource" sysfs file */ diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/lin= uxapp/eal/eal_pci_uio.c index 7e62266..e53f06b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -35,6 +35,7 @@ #include #include #include +#include =20 #include #include @@ -48,6 +49,8 @@ =20 static int pci_parse_sysfs_value(const char *filename, uint64_t *val); =20 +void *pci_map_addr =3D NULL; + =20 #define OFF_MAX ((uint64_t)(off_t)-1) static int @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev) if (maps[j].addr !=3D NULL) fail =3D 1; else { - mapaddr =3D pci_map_resource(NULL, fd, (off_t)offset, + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr =3D=3D NULL) + pci_map_addr =3D pci_find_max_end_va(); + + mapaddr =3D pci_map_resource(pci_map_addr, fd, (off_t)offset, (size_t)maps[j].size); - if (mapaddr =3D=3D NULL) + if (mapaddr =3D=3D MAP_FAILED) fail =3D 1; + + pci_map_addr =3D RTE_PTR_ADD(mapaddr, (size_t) maps[j].size); } =20 if (fail) { diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/li= nuxapp/eal/eal_pci_vfio.c index c776ddc..c1246e8 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -37,6 +37,7 @@ #include #include #include +#include =20 #include #include @@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev) if (i =3D=3D msix_bar) continue; =20 - bar_addr =3D pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, - reg.size); + if (internal_config.process_type =3D=3D RTE_PROC_PRIMARY) { + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr =3D=3D NULL) + pci_map_addr =3D pci_find_max_end_va(); + + bar_addr =3D pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset, + reg.size); + pci_map_addr =3D RTE_PTR_ADD(bar_addr, (size_t) reg.size); + } else { + bar_addr =3D pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, + reg.size); + } =20 - if (bar_addr =3D=3D NULL) { + if (bar_addr =3D=3D MAP_FAILED || + (internal_config.process_type =3D=3D RTE_PROC_SECONDARY && + bar_addr !=3D maps[i].addr)) { RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, strerror(errno)); close(vfio_dev_fd); diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librt= e_eal/linuxapp/eal/include/eal_pci_init.h index d758bee..1070eb8 100644 --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h @@ -59,6 +59,12 @@ struct mapped_pci_resource { TAILQ_HEAD(mapped_pci_res_= list, mapped_pci_resource); extern struct mapped_pci_res_list *pci_res_lis= t; =20 +/* + * Helper function to map PCI resources right after hugepages in=20 +virtual memory */ extern void *pci_map_addr; void=20 +*pci_find_max_end_va(void); + void *pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size); =20 -- 1.8.1.4