From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id 093144C74 for ; Thu, 31 May 2018 12:58:06 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 31 May 2018 03:58:03 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.49,463,1520924400"; d="scan'208";a="63167116" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga002.jf.intel.com with ESMTP; 31 May 2018 03:58:00 -0700 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w4VAw0gR004135; Thu, 31 May 2018 11:58:00 +0100 Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w4VAw0Jc002516; Thu, 31 May 2018 11:58:00 +0100 Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w4VAw0Rq002512; Thu, 31 May 2018 11:58:00 +0100 From: Anatoly Burakov To: dev@dpdk.org Cc: Ferruh Yigit , Gaetan Rivet , thomas@monjalon.net, hemant.agrawal@nxp.com, bruce.richardson@intel.com, konstantin.ananyev@intel.com, jerin.jacob@caviumnetworks.com, olivier.matz@6wind.com, stephen@networkplumber.org, nhorman@tuxdriver.com, david.marchand@6wind.com, gowrishankar.m@linux.vnet.ibm.com Date: Thu, 31 May 2018 11:57:50 +0100 Message-Id: X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC 3/3] bus/pci: use the new device memory API for BAR mapping X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 31 May 2018 10:58:07 -0000 Adjust PCI infrastructure to reserve device memory through the new device memory API. Any hotplug event will reserve memory, any hot-unplug event will release memory back to the system. This allows for more reliable PCI mappings in secondary processes, and will be crucial to support multiprocess hotplug. Signed-off-by: Anatoly Burakov --- drivers/bus/pci/linux/pci_init.h | 1 - drivers/bus/pci/linux/pci_uio.c | 11 +---------- drivers/bus/pci/linux/pci_vfio.c | 27 ++++++++++++--------------- lib/librte_pci/Makefile | 1 + lib/librte_pci/rte_pci.c | 20 +++++++++++++++++++- 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h index c2e603a37..bc9279c66 100644 --- a/drivers/bus/pci/linux/pci_init.h +++ b/drivers/bus/pci/linux/pci_init.h @@ -14,7 +14,6 @@ /* * Helper function to map PCI resources right after hugepages in virtual memory */ -extern void *pci_map_addr; void *pci_find_max_end_va(void); /* parse one line of the "resource" sysfs file (note that the 'line' diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c index d423e4bb0..dbf108b6f 100644 --- a/drivers/bus/pci/linux/pci_uio.c +++ b/drivers/bus/pci/linux/pci_uio.c @@ -26,8 +26,6 @@ #include "eal_filesystem.h" #include "pci_init.h" -void *pci_map_addr = NULL; - #define OFF_MAX ((uint64_t)(off_t)-1) int @@ -316,19 +314,12 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, goto error; } - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - mapaddr = pci_map_resource(pci_map_addr, fd, 0, + mapaddr = pci_map_resource(NULL, fd, 0, (size_t)dev->mem_resource[res_idx].len, 0); close(fd); if (mapaddr == MAP_FAILED) goto error; - pci_map_addr = RTE_PTR_ADD(mapaddr, - (size_t)dev->mem_resource[res_idx].len); - maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; maps[map_idx].size = dev->mem_resource[res_idx].len; maps[map_idx].addr = mapaddr; diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index aeeaa9ed8..f390ea37a 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -324,7 +324,7 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) static int pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, - int bar_index, int additional_flags) + int bar_index) { struct memreg { unsigned long offset, size; @@ -371,9 +371,14 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, memreg[0].size = bar->size; } - /* reserve the address using an inaccessible mapping */ - bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE | - MAP_ANONYMOUS | additional_flags, -1, 0); + if (bar->addr == NULL) { + bar_addr = rte_mem_dev_memory_alloc(bar->size, 0); + if (bar_addr == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot reserve space for device\n", + __func__); + return -1; + } + } if (bar_addr != MAP_FAILED) { void *map_addr = NULL; if (memreg[0].size) { @@ -469,7 +474,6 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) for (i = 0; i < (int) vfio_res->nb_maps; i++) { struct vfio_region_info reg = { .argsz = sizeof(reg) }; - void *bar_addr; reg.index = i; @@ -494,19 +498,12 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) continue; - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - bar_addr = pci_map_addr; - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); - - maps[i].addr = bar_addr; + maps[i].addr = NULL; maps[i].offset = reg.offset; maps[i].size = reg.size; maps[i].path = NULL; /* vfio doesn't have per-resource paths */ - ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); + ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i); if (ret < 0) { RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, strerror(errno)); @@ -574,7 +571,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) maps = vfio_res->maps; for (i = 0; i < (int) vfio_res->nb_maps; i++) { - ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED); + ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i); if (ret < 0) { RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, strerror(errno)); diff --git a/lib/librte_pci/Makefile b/lib/librte_pci/Makefile index 94a632670..f996fe33c 100644 --- a/lib/librte_pci/Makefile +++ b/lib/librte_pci/Makefile @@ -8,6 +8,7 @@ LIB = librte_pci.a CFLAGS := -I$(SRCDIR) $(CFLAGS) CFLAGS += $(WERROR_FLAGS) -O3 +CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal EXPORT_MAP := rte_pci_version.map diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c index 530738dbd..c425a624e 100644 --- a/lib/librte_pci/rte_pci.c +++ b/lib/librte_pci/rte_pci.c @@ -151,6 +151,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, { void *mapaddr; + if (requested_addr == NULL) { + requested_addr = rte_mem_dev_memory_alloc(size, 0); + if (requested_addr == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot reserve space for device\n", + __func__); + return MAP_FAILED; + } + } + additional_flags |= MAP_FIXED; + /* Map the PCI memory resource of device */ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, MAP_SHARED | additional_flags, fd, offset); @@ -170,15 +180,23 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, void pci_unmap_resource(void *requested_addr, size_t size) { + void *mapped; if (requested_addr == NULL) return; + mapped = mmap(requested_addr, size, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + /* Unmap the PCI memory resource of device */ - if (munmap(requested_addr, size)) { + if (mapped == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, %#zx): %s\n", __func__, requested_addr, size, strerror(errno)); } else RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n", requested_addr); + if (rte_mem_dev_memory_free(requested_addr, size)) + RTE_LOG(ERR, EAL, "%s(): cannot mark %p-%p as free\n", + __func__, requested_addr, + RTE_PTR_ADD(requested_addr, size)); } -- 2.17.0