DPDK patches and discussions
 help / color / mirror / Atom feed
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages
Date: Tue, 11 Nov 2014 10:09:25 +0000
Message-ID: <1415700565-19157-1-git-send-email-anatoly.burakov@intel.com> (raw)
In-Reply-To: <1415619272-8281-1-git-send-email-anatoly.burakov@intel.com>

Multi-process DPDK application must mmap hugepages and PCI resources
into the same virtual address space. By default the virtual addresses
are chosen by the primary process automatically when calling the mmap.
But sometimes the chosen virtual addresses aren't usable in secondary
process - for example, secondary process is linked with more libraries
than primary process, and the library occupies the same address space
that the primary process has requested for PCI mappings.

This patch makes EAL try and map PCI BARs right after the hugepages
(instead of location chosen by mmap) in virtual memory, so that PCI BARs
have less chance of ending up in random places in virtual memory.

Signed-off-by: Liang Xu <liang.xu@cinfotech.cn>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/linuxapp/eal/eal_pci.c              | 30 ++++++++++++++++------
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c          | 13 ++++++++--
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 19 +++++++++++---
 lib/librte_eal/linuxapp/eal/include/eal_pci_init.h |  6 +++++
 4 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 5fe3961..79fbbb8 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -97,6 +97,25 @@ error:
 	return -1;
 }
 
+void *
+pci_find_max_end_va(void)
+{
+	const struct rte_memseg *seg = rte_eal_get_physmem_layout();
+	const struct rte_memseg *last = seg;
+	unsigned i = 0;
+
+	for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
+		if (seg->addr == NULL)
+			break;
+
+		if (seg->addr > last->addr)
+			last = seg;
+
+	}
+	return RTE_PTR_ADD(last->addr, last->len);
+}
+
+
 /* map a particular resource from a file */
 void *
 pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
@@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
 	/* Map the PCI memory resource of device */
 	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
 			MAP_SHARED, fd, offset);
-	if (mapaddr == MAP_FAILED ||
-			(requested_addr != NULL && mapaddr != requested_addr)) {
+	if (mapaddr == MAP_FAILED) {
 		RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
 			__func__, fd, requested_addr,
 			(unsigned long)size, (unsigned long)offset,
 			strerror(errno), mapaddr);
-		goto fail;
+	} else {
+		RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
 	}
 
-	RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
-
 	return mapaddr;
-
-fail:
-	return NULL;
 }
 
 /* parse the "resource" sysfs file */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index 7e62266..e53f06b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -35,6 +35,7 @@
 #include <fcntl.h>
 #include <dirent.h>
 #include <sys/stat.h>
+#include <sys/mman.h>
 
 #include <rte_log.h>
 #include <rte_pci.h>
@@ -48,6 +49,8 @@
 
 static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
 
+void *pci_map_addr = NULL;
+
 
 #define OFF_MAX              ((uint64_t)(off_t)-1)
 static int
@@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev)
 			if (maps[j].addr != NULL)
 				fail = 1;
 			else {
-				mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
+				/* try mapping somewhere close to the end of hugepages */
+				if (pci_map_addr == NULL)
+					pci_map_addr = pci_find_max_end_va();
+
+				mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset,
 						(size_t)maps[j].size);
-				if (mapaddr == NULL)
+				if (mapaddr == MAP_FAILED)
 					fail = 1;
+
+				pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) maps[j].size);
 			}
 
 			if (fail) {
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index c776ddc..c1246e8 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -37,6 +37,7 @@
 #include <sys/eventfd.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
+#include <sys/mman.h>
 
 #include <rte_log.h>
 #include <rte_pci.h>
@@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
 		if (i == msix_bar)
 			continue;
 
-		bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
-				reg.size);
+		if (internal_config.process_type == RTE_PROC_PRIMARY) {
+			/* try mapping somewhere close to the end of hugepages */
+			if (pci_map_addr == NULL)
+				pci_map_addr = pci_find_max_end_va();
+
+			bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset,
+					reg.size);
+			pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+		} else {
+			bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
+					reg.size);
+		}
 
-		if (bar_addr == NULL) {
+		if (bar_addr == MAP_FAILED ||
+				(internal_config.process_type == RTE_PROC_SECONDARY &&
+						bar_addr != maps[i].addr)) {
 			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n", pci_addr, i,
 					strerror(errno));
 			close(vfio_dev_fd);
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
index d758bee..1070eb8 100644
--- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
@@ -59,6 +59,12 @@ struct mapped_pci_resource {
 TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
 extern struct mapped_pci_res_list *pci_res_list;
 
+/*
+ * Helper function to map PCI resources right after hugepages in virtual memory
+ */
+extern void *pci_map_addr;
+void *pci_find_max_end_va(void);
+
 void *pci_map_resource(void *requested_addr, int fd, off_t offset,
 		size_t size);
 
-- 
1.8.1.4

  parent reply	other threads:[~2014-11-11  9:59 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-05 13:25 [dpdk-dev] [PATCH] eal: map uio resources after hugepages when the base_virtaddr is configured lxu
2014-11-05 15:10 ` Burakov, Anatoly
2014-11-05 15:49 ` [dpdk-dev] 答复: " XU Liang
2014-11-05 15:59   ` Burakov, Anatoly
2014-11-05 16:10   ` [dpdk-dev] 答复:答复: " XU Liang
2014-11-26  1:46     ` Qiu, Michael
2014-11-26  9:58       ` Burakov, Anatoly
2014-11-06 14:11 ` [dpdk-dev] [PATCH v2] " lxu
2014-11-06 14:27   ` Burakov, Anatoly
2014-11-06 14:48   ` [dpdk-dev] 答复:[PATCH " 徐亮
2014-11-06 14:47 ` [dpdk-dev] [PATCH v3] " lxu
2014-11-06 15:06   ` De Lara Guarch, Pablo
2014-11-06 15:07 ` [dpdk-dev] [PATCH v4] " lxu
2014-11-06 15:12   ` Thomas Monjalon
2014-11-06 15:11 ` lxu
2014-11-06 15:32 ` [dpdk-dev] [PATCH v5] " lxu
2014-11-06 15:41   ` Burakov, Anatoly
2014-11-06 15:58     ` Thomas Monjalon
2014-11-06 16:10       ` Burakov, Anatoly
2014-11-06 17:30         ` Bruce Richardson
2014-11-07  8:01 ` [dpdk-dev] [PATCH v6] " lxu
2014-11-07  9:42   ` Bruce Richardson
2014-11-07  9:47   ` Burakov, Anatoly
2014-11-07  9:57   ` XU Liang
2014-11-07 14:37     ` XU Liang
2014-11-10 11:34   ` [dpdk-dev] [PATCH v7] eal: map PCI memory resources after hugepages Anatoly Burakov
2014-11-10 13:33     ` Burakov, Anatoly
2014-11-11  3:53     ` XU Liang
2014-11-11 10:09     ` Anatoly Burakov [this message]
2014-11-13 11:34       ` [dpdk-dev] [PATCH v8] " Burakov, Anatoly
2014-11-13 12:58         ` Bruce Richardson
2014-11-13 13:44           ` Burakov, Anatoly
2014-11-13 13:46       ` Bruce Richardson
2014-11-25 17:17         ` Thomas Monjalon
2014-11-07 14:57 ` [dpdk-dev] [PATCH v7] eal: map uio " lxu
2014-11-07 15:14   ` Burakov, Anatoly
2014-11-07 15:15   ` Thomas Monjalon
2014-11-07 15:19   ` XU Liang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1415700565-19157-1-git-send-email-anatoly.burakov@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git