DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] eal: Support running as unprivileged user
@ 2017-01-03 22:30 Ben Walker
  2017-01-03 22:56 ` [dpdk-dev] [PATCH v2] " Ben Walker
  0 siblings, 1 reply; 9+ messages in thread
From: Ben Walker @ 2017-01-03 22:30 UTC (permalink / raw)
  To: dev; +Cc: Ben Walker

For Linux kernel 4.0 and newer, the ability to obtain
physical page frame numbers for unprivileged users from
/proc/self/pagemap was removed. Instead, when an IOMMU
is present, simply choose our own DMA addresses instead.

Signed-off-by: Ben Walker <benjamin.walker@intel.com>
---
 lib/librte_eal/common/eal_private.h      | 12 ++++++
 lib/librte_eal/linuxapp/eal/eal_memory.c | 71 +++++++++++++++++++++++---------
 lib/librte_eal/linuxapp/eal/eal_pci.c    |  6 ++-
 3 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 9e7d8f6..8b2d323 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -34,6 +34,7 @@
 #ifndef _EAL_PRIVATE_H_
 #define _EAL_PRIVATE_H_
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <rte_pci.h>
 
@@ -301,4 +302,15 @@ int rte_eal_hugepage_init(void);
  */
 int rte_eal_hugepage_attach(void);
 
+/**
+ * Returns true if the system is able to obtain
+ * physical addresses. Return false if using DMA
+ * addresses through an IOMMU.
+ *
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
+ */
+bool rte_eal_using_phys_addrs(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a956bb2..33c66c1 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -64,6 +64,7 @@
 #define _FILE_OFFSET_BITS 64
 #include <errno.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -122,26 +123,24 @@ int rte_xen_dom0_supported(void)
 
 static uint64_t baseaddr_offset;
 
-static unsigned proc_pagemap_readable;
+static bool phys_addrs_available = true;
 
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
 
 static void
-test_proc_pagemap_readable(void)
+test_phys_addrs_available(void)
 {
-	int fd = open("/proc/self/pagemap", O_RDONLY);
+	uint64_t tmp;
+	phys_addr_t physaddr;
 
-	if (fd < 0) {
+	physaddr = rte_mem_virt2phy(&tmp);
+	if (physaddr == RTE_BAD_PHYS_ADDR) {
 		RTE_LOG(ERR, EAL,
-			"Cannot open /proc/self/pagemap: %s. "
-			"virt2phys address translation will not work\n",
+			"Cannot obtain physical addresses: %s. "
+			"Only vfio will function.\n",
 			strerror(errno));
-		return;
+		phys_addrs_available = false;
 	}
-
-	/* Is readable */
-	close(fd);
-	proc_pagemap_readable = 1;
 }
 
 /* Lock page in physical memory and prevent from swapping. */
@@ -190,7 +189,7 @@ rte_mem_virt2phy(const void *virtaddr)
 	}
 
 	/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
-	if (!proc_pagemap_readable)
+	if (!phys_addrs_available)
 		return RTE_BAD_PHYS_ADDR;
 
 	/* standard page size */
@@ -229,6 +228,9 @@ rte_mem_virt2phy(const void *virtaddr)
 	 * the pfn (page frame number) are bits 0-54 (see
 	 * pagemap.txt in linux Documentation)
 	 */
+	if ((page & 0x7fffffffffffffULL) == 0)
+		return RTE_BAD_PHYS_ADDR;
+
 	physaddr = ((page & 0x7fffffffffffffULL) * page_size)
 		+ ((unsigned long)virtaddr % page_size);
 
@@ -255,6 +257,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 }
 
 /*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+	unsigned i;
+	phys_addr_t addr = 0;
+
+	for (i = 0; i < hpi->num_pages[0]; i++) {
+		hugepg_tbl[i].physaddr = addr;
+		addr += hugepg_tbl[i].size;
+	}
+	return 0;
+}
+
+/*
  * Check whether address-space layout randomization is enabled in
  * the kernel. This is important for multi-process as it can prevent
  * two processes mapping data to the same virtual address
@@ -951,7 +969,7 @@ rte_eal_hugepage_init(void)
 	int nr_hugefiles, nr_hugepages = 0;
 	void *addr;
 
-	test_proc_pagemap_readable();
+	test_phys_addrs_available();
 
 	memset(used_hp, 0, sizeof(used_hp));
 
@@ -1043,11 +1061,20 @@ rte_eal_hugepage_init(void)
 				continue;
 		}
 
-		/* find physical addresses and sockets for each hugepage */
-		if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
-			RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
-					(unsigned)(hpi->hugepage_sz / 0x100000));
-			goto fail;
+		if (phys_addrs_available) {
+			/* find physical addresses for each hugepage */
+			if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
+				RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
+						(unsigned)(hpi->hugepage_sz / 0x100000));
+				goto fail;
+			}
+		} else {
+			/* set physical addresses for each hugepage */
+			if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+				RTE_LOG(DEBUG, EAL, "Failed to set phys addr for %u MB pages\n",
+						(unsigned)(hpi->hugepage_sz / 0x100000));
+				goto fail;
+			}
 		}
 
 		if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@@ -1289,7 +1316,7 @@ rte_eal_hugepage_attach(void)
 				"into secondary processes\n");
 	}
 
-	test_proc_pagemap_readable();
+	test_phys_addrs_available();
 
 	if (internal_config.xen_dom0_support) {
 #ifdef RTE_LIBRTE_XEN_DOM0
@@ -1426,3 +1453,9 @@ rte_eal_hugepage_attach(void)
 		close(fd_hugepage);
 	return -1;
 }
+
+bool
+rte_eal_using_phys_addrs(void)
+{
+	return phys_addrs_available;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 4350134..cfbfedf 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -99,8 +99,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
 		break;
 	case RTE_KDRV_IGB_UIO:
 	case RTE_KDRV_UIO_GENERIC:
-		/* map resources for devices that use uio */
-		ret = pci_uio_map_resource(dev);
+		if (rte_eal_using_phys_addrs()) {
+			/* map resources for devices that use uio */
+			ret = pci_uio_map_resource(dev);
+		}
 		break;
 	default:
 		RTE_LOG(DEBUG, EAL,
-- 
2.9.3

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-03-09 16:11 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-03 22:30 [dpdk-dev] [PATCH] eal: Support running as unprivileged user Ben Walker
2017-01-03 22:56 ` [dpdk-dev] [PATCH v2] " Ben Walker
2017-01-18 17:05   ` Sergio Gonzalez Monroy
2017-01-31 17:40   ` [dpdk-dev] [PATCH v3] " Ben Walker
2017-01-31 17:44     ` [dpdk-dev] [PATCH v4] " Ben Walker
2017-02-17 14:59       ` Sergio Gonzalez Monroy
2017-03-09 16:10         ` Thomas Monjalon
2017-02-17 19:28       ` Stephen Hemminger
2017-03-09 16:11         ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).