DPDK patches and discussions
 help / color / mirror / Atom feed
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Cc: Ferruh Yigit <ferruh.yigit@intel.com>,
	Gaetan Rivet <gaetan.rivet@6wind.com>,
	thomas@monjalon.net, hemant.agrawal@nxp.com,
	bruce.richardson@intel.com, konstantin.ananyev@intel.com,
	jerin.jacob@caviumnetworks.com, olivier.matz@6wind.com,
	stephen@networkplumber.org, nhorman@tuxdriver.com,
	david.marchand@6wind.com, gowrishankar.m@linux.vnet.ibm.com
Subject: [dpdk-dev] [RFC 3/3] bus/pci: use the new device memory API for BAR mapping
Date: Thu, 31 May 2018 11:57:50 +0100	[thread overview]
Message-ID: <ca9f07f8d5adcb6cf1a410360e9adc8cf68cae1b.1527764061.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <cover.1527764061.git.anatoly.burakov@intel.com>
In-Reply-To: <cover.1527764061.git.anatoly.burakov@intel.com>

Adjust PCI infrastructure to reserve device memory through the
new device memory API. Any hotplug event will reserve memory, any
hot-unplug event will release memory back to the system.

This allows for more reliable PCI mappings in secondary processes,
and will be crucial to support multiprocess hotplug.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/bus/pci/linux/pci_init.h |  1 -
 drivers/bus/pci/linux/pci_uio.c  | 11 +----------
 drivers/bus/pci/linux/pci_vfio.c | 27 ++++++++++++---------------
 lib/librte_pci/Makefile          |  1 +
 lib/librte_pci/rte_pci.c         | 20 +++++++++++++++++++-
 5 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index c2e603a37..bc9279c66 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -14,7 +14,6 @@
 /*
  * Helper function to map PCI resources right after hugepages in virtual memory
  */
-extern void *pci_map_addr;
 void *pci_find_max_end_va(void);
 
 /* parse one line of the "resource" sysfs file (note that the 'line'
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index d423e4bb0..dbf108b6f 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -26,8 +26,6 @@
 #include "eal_filesystem.h"
 #include "pci_init.h"
 
-void *pci_map_addr = NULL;
-
 #define OFF_MAX              ((uint64_t)(off_t)-1)
 
 int
@@ -316,19 +314,12 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
 		goto error;
 	}
 
-	/* try mapping somewhere close to the end of hugepages */
-	if (pci_map_addr == NULL)
-		pci_map_addr = pci_find_max_end_va();
-
-	mapaddr = pci_map_resource(pci_map_addr, fd, 0,
+	mapaddr = pci_map_resource(NULL, fd, 0,
 			(size_t)dev->mem_resource[res_idx].len, 0);
 	close(fd);
 	if (mapaddr == MAP_FAILED)
 		goto error;
 
-	pci_map_addr = RTE_PTR_ADD(mapaddr,
-			(size_t)dev->mem_resource[res_idx].len);
-
 	maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
 	maps[map_idx].size = dev->mem_resource[res_idx].len;
 	maps[map_idx].addr = mapaddr;
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index aeeaa9ed8..f390ea37a 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -324,7 +324,7 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
 
 static int
 pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
-		int bar_index, int additional_flags)
+		int bar_index)
 {
 	struct memreg {
 		unsigned long offset, size;
@@ -371,9 +371,14 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
 		memreg[0].size = bar->size;
 	}
 
-	/* reserve the address using an inaccessible mapping */
-	bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE |
-			MAP_ANONYMOUS | additional_flags, -1, 0);
+	if (bar->addr == NULL) {
+		bar_addr = rte_mem_dev_memory_alloc(bar->size, 0);
+		if (bar_addr == NULL) {
+			RTE_LOG(ERR, EAL, "%s(): cannot reserve space for device\n",
+				__func__);
+			return -1;
+		}
+	}
 	if (bar_addr != MAP_FAILED) {
 		void *map_addr = NULL;
 		if (memreg[0].size) {
@@ -469,7 +474,6 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 
 	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
 		struct vfio_region_info reg = { .argsz = sizeof(reg) };
-		void *bar_addr;
 
 		reg.index = i;
 
@@ -494,19 +498,12 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 		if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
 			continue;
 
-		/* try mapping somewhere close to the end of hugepages */
-		if (pci_map_addr == NULL)
-			pci_map_addr = pci_find_max_end_va();
-
-		bar_addr = pci_map_addr;
-		pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
-
-		maps[i].addr = bar_addr;
+		maps[i].addr = NULL;
 		maps[i].offset = reg.offset;
 		maps[i].size = reg.size;
 		maps[i].path = NULL; /* vfio doesn't have per-resource paths */
 
-		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
+		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i);
 		if (ret < 0) {
 			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
 					pci_addr, i, strerror(errno));
@@ -574,7 +571,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 	maps = vfio_res->maps;
 
 	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
-		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED);
+		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i);
 		if (ret < 0) {
 			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
 					pci_addr, i, strerror(errno));
diff --git a/lib/librte_pci/Makefile b/lib/librte_pci/Makefile
index 94a632670..f996fe33c 100644
--- a/lib/librte_pci/Makefile
+++ b/lib/librte_pci/Makefile
@@ -8,6 +8,7 @@ LIB = librte_pci.a
 
 CFLAGS := -I$(SRCDIR) $(CFLAGS)
 CFLAGS += $(WERROR_FLAGS) -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal
 
 EXPORT_MAP := rte_pci_version.map
diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c
index 530738dbd..c425a624e 100644
--- a/lib/librte_pci/rte_pci.c
+++ b/lib/librte_pci/rte_pci.c
@@ -151,6 +151,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
 {
 	void *mapaddr;
 
+	if (requested_addr == NULL) {
+		requested_addr = rte_mem_dev_memory_alloc(size, 0);
+		if (requested_addr == NULL) {
+			RTE_LOG(ERR, EAL, "%s(): cannot reserve space for device\n",
+				__func__);
+			return MAP_FAILED;
+		}
+	}
+	additional_flags |= MAP_FIXED;
+
 	/* Map the PCI memory resource of device */
 	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
 			MAP_SHARED | additional_flags, fd, offset);
@@ -170,15 +180,23 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
 void
 pci_unmap_resource(void *requested_addr, size_t size)
 {
+	void *mapped;
 	if (requested_addr == NULL)
 		return;
 
+	mapped = mmap(requested_addr, size, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
 	/* Unmap the PCI memory resource of device */
-	if (munmap(requested_addr, size)) {
+	if (mapped == MAP_FAILED) {
 		RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, %#zx): %s\n",
 			__func__, requested_addr, size,
 			strerror(errno));
 	} else
 		RTE_LOG(DEBUG, EAL, "  PCI memory unmapped at %p\n",
 				requested_addr);
+	if (rte_mem_dev_memory_free(requested_addr, size))
+		RTE_LOG(ERR, EAL, "%s(): cannot mark %p-%p as free\n",
+			__func__, requested_addr,
+			RTE_PTR_ADD(requested_addr, size));
 }
-- 
2.17.0

  parent reply	other threads:[~2018-05-31 10:58 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-31 10:57 [dpdk-dev] [RFC 0/3] Make device mapping more reliable Anatoly Burakov
2018-05-31 10:57 ` [dpdk-dev] [RFC 1/3] fbarray: allow zero-sized elements Anatoly Burakov
2018-05-31 10:57 ` [dpdk-dev] [RFC 2/3] mem: add device memory reserve/free API Anatoly Burakov
2018-05-31 10:57 ` Anatoly Burakov [this message]
2018-08-14 10:13 ` [dpdk-dev] [RFC 0/3] Make device mapping more reliable Burakov, Anatoly

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ca9f07f8d5adcb6cf1a410360e9adc8cf68cae1b.1527764061.git.anatoly.burakov@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=david.marchand@6wind.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=gaetan.rivet@6wind.com \
    --cc=gowrishankar.m@linux.vnet.ibm.com \
    --cc=hemant.agrawal@nxp.com \
    --cc=jerin.jacob@caviumnetworks.com \
    --cc=konstantin.ananyev@intel.com \
    --cc=nhorman@tuxdriver.com \
    --cc=olivier.matz@6wind.com \
    --cc=stephen@networkplumber.org \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).