* [dpdk-dev] [PATCH] eal: Support running as unprivileged user
@ 2017-01-03 22:30 Ben Walker
2017-01-03 22:56 ` [dpdk-dev] [PATCH v2] " Ben Walker
0 siblings, 1 reply; 9+ messages in thread
From: Ben Walker @ 2017-01-03 22:30 UTC (permalink / raw)
To: dev; +Cc: Ben Walker
For Linux kernel 4.0 and newer, the ability to obtain
physical page frame numbers for unprivileged users from
/proc/self/pagemap was removed. Instead, when an IOMMU
is present, simply choose our own DMA addresses instead.
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
---
lib/librte_eal/common/eal_private.h | 12 ++++++
lib/librte_eal/linuxapp/eal/eal_memory.c | 71 +++++++++++++++++++++++---------
lib/librte_eal/linuxapp/eal/eal_pci.c | 6 ++-
3 files changed, 68 insertions(+), 21 deletions(-)
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 9e7d8f6..8b2d323 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -34,6 +34,7 @@
#ifndef _EAL_PRIVATE_H_
#define _EAL_PRIVATE_H_
+#include <stdbool.h>
#include <stdio.h>
#include <rte_pci.h>
@@ -301,4 +302,15 @@ int rte_eal_hugepage_init(void);
*/
int rte_eal_hugepage_attach(void);
+/**
+ * Returns true if the system is able to obtain
+ * physical addresses. Return false if using DMA
+ * addresses through an IOMMU.
+ *
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
+ */
+bool rte_eal_using_phys_addrs(void);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a956bb2..33c66c1 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -64,6 +64,7 @@
#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
@@ -122,26 +123,24 @@ int rte_xen_dom0_supported(void)
static uint64_t baseaddr_offset;
-static unsigned proc_pagemap_readable;
+static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
static void
-test_proc_pagemap_readable(void)
+test_phys_addrs_available(void)
{
- int fd = open("/proc/self/pagemap", O_RDONLY);
+ uint64_t tmp;
+ phys_addr_t physaddr;
- if (fd < 0) {
+ physaddr = rte_mem_virt2phy(&tmp);
+ if (physaddr == RTE_BAD_PHYS_ADDR) {
RTE_LOG(ERR, EAL,
- "Cannot open /proc/self/pagemap: %s. "
- "virt2phys address translation will not work\n",
+ "Cannot obtain physical addresses: %s. "
+ "Only vfio will function.\n",
strerror(errno));
- return;
+ phys_addrs_available = false;
}
-
- /* Is readable */
- close(fd);
- proc_pagemap_readable = 1;
}
/* Lock page in physical memory and prevent from swapping. */
@@ -190,7 +189,7 @@ rte_mem_virt2phy(const void *virtaddr)
}
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
- if (!proc_pagemap_readable)
+ if (!phys_addrs_available)
return RTE_BAD_PHYS_ADDR;
/* standard page size */
@@ -229,6 +228,9 @@ rte_mem_virt2phy(const void *virtaddr)
* the pfn (page frame number) are bits 0-54 (see
* pagemap.txt in linux Documentation)
*/
+ if ((page & 0x7fffffffffffffULL) == 0)
+ return RTE_BAD_PHYS_ADDR;
+
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
@@ -255,6 +257,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
}
/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned i;
+ phys_addr_t addr = 0;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ hugepg_tbl[i].physaddr = addr;
+ addr += hugepg_tbl[i].size;
+ }
+ return 0;
+}
+
+/*
* Check whether address-space layout randomization is enabled in
* the kernel. This is important for multi-process as it can prevent
* two processes mapping data to the same virtual address
@@ -951,7 +969,7 @@ rte_eal_hugepage_init(void)
int nr_hugefiles, nr_hugepages = 0;
void *addr;
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
memset(used_hp, 0, sizeof(used_hp));
@@ -1043,11 +1061,20 @@ rte_eal_hugepage_init(void)
continue;
}
- /* find physical addresses and sockets for each hugepage */
- if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
- RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
- (unsigned)(hpi->hugepage_sz / 0x100000));
- goto fail;
+ if (phys_addrs_available) {
+ /* find physical addresses for each hugepage */
+ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+ } else {
+ /* set physical addresses for each hugepage */
+ if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to set phys addr for %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
}
if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@@ -1289,7 +1316,7 @@ rte_eal_hugepage_attach(void)
"into secondary processes\n");
}
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
if (internal_config.xen_dom0_support) {
#ifdef RTE_LIBRTE_XEN_DOM0
@@ -1426,3 +1453,9 @@ rte_eal_hugepage_attach(void)
close(fd_hugepage);
return -1;
}
+
+bool
+rte_eal_using_phys_addrs(void)
+{
+ return phys_addrs_available;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 4350134..cfbfedf 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -99,8 +99,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
+ if (rte_eal_using_phys_addrs()) {
+ /* map resources for devices that use uio */
+ ret = pci_uio_map_resource(dev);
+ }
break;
default:
RTE_LOG(DEBUG, EAL,
--
2.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [dpdk-dev] [PATCH v2] eal: Support running as unprivileged user
2017-01-03 22:30 [dpdk-dev] [PATCH] eal: Support running as unprivileged user Ben Walker
@ 2017-01-03 22:56 ` Ben Walker
2017-01-18 17:05 ` Sergio Gonzalez Monroy
2017-01-31 17:40 ` [dpdk-dev] [PATCH v3] " Ben Walker
0 siblings, 2 replies; 9+ messages in thread
From: Ben Walker @ 2017-01-03 22:56 UTC (permalink / raw)
To: dev; +Cc: Ben Walker
For Linux kernel 4.0 and newer, the ability to obtain
physical page frame numbers for unprivileged users from
/proc/self/pagemap was removed. Instead, when an IOMMU
is present, simply choose our own DMA addresses instead.
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
---
lib/librte_eal/common/eal_private.h | 12 +++++
lib/librte_eal/linuxapp/eal/eal_memory.c | 75 +++++++++++++++++++++++---------
lib/librte_eal/linuxapp/eal/eal_pci.c | 6 ++-
3 files changed, 71 insertions(+), 22 deletions(-)
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 9e7d8f6..8b2d323 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -34,6 +34,7 @@
#ifndef _EAL_PRIVATE_H_
#define _EAL_PRIVATE_H_
+#include <stdbool.h>
#include <stdio.h>
#include <rte_pci.h>
@@ -301,4 +302,15 @@ int rte_eal_hugepage_init(void);
*/
int rte_eal_hugepage_attach(void);
+/**
+ * Returns true if the system is able to obtain
+ * physical addresses. Return false if using DMA
+ * addresses through an IOMMU.
+ *
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
+ */
+bool rte_eal_using_phys_addrs(void);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a956bb2..8678ae9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -64,6 +64,7 @@
#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
@@ -122,26 +123,24 @@ int rte_xen_dom0_supported(void)
static uint64_t baseaddr_offset;
-static unsigned proc_pagemap_readable;
+static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
static void
-test_proc_pagemap_readable(void)
+test_phys_addrs_available(void)
{
- int fd = open("/proc/self/pagemap", O_RDONLY);
+ uint64_t tmp;
+ phys_addr_t physaddr;
- if (fd < 0) {
+ physaddr = rte_mem_virt2phy(&tmp);
+ if (physaddr == RTE_BAD_PHYS_ADDR) {
RTE_LOG(ERR, EAL,
- "Cannot open /proc/self/pagemap: %s. "
- "virt2phys address translation will not work\n",
+ "Cannot obtain physical addresses: %s. "
+ "Only vfio will function.\n",
strerror(errno));
- return;
+ phys_addrs_available = false;
}
-
- /* Is readable */
- close(fd);
- proc_pagemap_readable = 1;
}
/* Lock page in physical memory and prevent from swapping. */
@@ -190,7 +189,7 @@ rte_mem_virt2phy(const void *virtaddr)
}
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
- if (!proc_pagemap_readable)
+ if (!phys_addrs_available)
return RTE_BAD_PHYS_ADDR;
/* standard page size */
@@ -229,6 +228,9 @@ rte_mem_virt2phy(const void *virtaddr)
* the pfn (page frame number) are bits 0-54 (see
* pagemap.txt in linux Documentation)
*/
+ if ((page & 0x7fffffffffffffULL) == 0)
+ return RTE_BAD_PHYS_ADDR;
+
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
@@ -242,7 +244,7 @@ rte_mem_virt2phy(const void *virtaddr)
static int
find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
- unsigned i;
+ unsigned int i;
phys_addr_t addr;
for (i = 0; i < hpi->num_pages[0]; i++) {
@@ -255,6 +257,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
}
/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned int i;
+ phys_addr_t addr = 0;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ hugepg_tbl[i].physaddr = addr;
+ addr += hugepg_tbl[i].size;
+ }
+ return 0;
+}
+
+/*
* Check whether address-space layout randomization is enabled in
* the kernel. This is important for multi-process as it can prevent
* two processes mapping data to the same virtual address
@@ -951,7 +969,7 @@ rte_eal_hugepage_init(void)
int nr_hugefiles, nr_hugepages = 0;
void *addr;
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
memset(used_hp, 0, sizeof(used_hp));
@@ -1043,11 +1061,22 @@ rte_eal_hugepage_init(void)
continue;
}
- /* find physical addresses and sockets for each hugepage */
- if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
- RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
- (unsigned)(hpi->hugepage_sz / 0x100000));
- goto fail;
+ if (phys_addrs_available) {
+ /* find physical addresses for each hugepage */
+ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+ } else {
+ /* set physical addresses for each hugepage */
+ if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
}
if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@@ -1289,7 +1318,7 @@ rte_eal_hugepage_attach(void)
"into secondary processes\n");
}
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
if (internal_config.xen_dom0_support) {
#ifdef RTE_LIBRTE_XEN_DOM0
@@ -1426,3 +1455,9 @@ rte_eal_hugepage_attach(void)
close(fd_hugepage);
return -1;
}
+
+bool
+rte_eal_using_phys_addrs(void)
+{
+ return phys_addrs_available;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 4350134..cfbfedf 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -99,8 +99,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
+ if (rte_eal_using_phys_addrs()) {
+ /* map resources for devices that use uio */
+ ret = pci_uio_map_resource(dev);
+ }
break;
default:
RTE_LOG(DEBUG, EAL,
--
2.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [dpdk-dev] [PATCH v2] eal: Support running as unprivileged user
2017-01-03 22:56 ` [dpdk-dev] [PATCH v2] " Ben Walker
@ 2017-01-18 17:05 ` Sergio Gonzalez Monroy
2017-01-31 17:40 ` [dpdk-dev] [PATCH v3] " Ben Walker
1 sibling, 0 replies; 9+ messages in thread
From: Sergio Gonzalez Monroy @ 2017-01-18 17:05 UTC (permalink / raw)
To: Ben Walker, dev, Jianfeng Tan
On 03/01/2017 22:56, Ben Walker wrote:
> For Linux kernel 4.0 and newer, the ability to obtain
> physical page frame numbers for unprivileged users from
> /proc/self/pagemap was removed. Instead, when an IOMMU
> is present, simply choose our own DMA addresses instead.
>
> Signed-off-by: Ben Walker <benjamin.walker@intel.com>
> @@ -255,6 +257,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
> }
>
> /*
> + * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
> + */
> +static int
> +set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
> +{
> + unsigned int i;
> + phys_addr_t addr = 0;
> +
> + for (i = 0; i < hpi->num_pages[0]; i++) {
> + hugepg_tbl[i].physaddr = addr;
> + addr += hugepg_tbl[i].size;
> + }
> + return 0;
> +}
> +
Sorry for late catch, but when having multiple hugepage sizes, both
would be mapped from address 0.
I thin making the var static should be enough.
Also I would prefer some randomness on the start address instead of
always 0.
Thanks,
Sergio
^ permalink raw reply [flat|nested] 9+ messages in thread
* [dpdk-dev] [PATCH v3] eal: Support running as unprivileged user
2017-01-03 22:56 ` [dpdk-dev] [PATCH v2] " Ben Walker
2017-01-18 17:05 ` Sergio Gonzalez Monroy
@ 2017-01-31 17:40 ` Ben Walker
2017-01-31 17:44 ` [dpdk-dev] [PATCH v4] " Ben Walker
1 sibling, 1 reply; 9+ messages in thread
From: Ben Walker @ 2017-01-31 17:40 UTC (permalink / raw)
To: dev; +Cc: Ben Walker
For Linux kernel 4.0 and newer, the ability to obtain
physical page frame numbers for unprivileged users from
/proc/self/pagemap was removed. Instead, when an IOMMU
is present, simply choose our own DMA addresses instead.
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
---
lib/librte_eal/common/eal_private.h | 12 +++++
lib/librte_eal/linuxapp/eal/eal_memory.c | 75 +++++++++++++++++++++++---------
lib/librte_eal/linuxapp/eal/eal_pci.c | 6 ++-
3 files changed, 71 insertions(+), 22 deletions(-)
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 9e7d8f6..8b2d323 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -34,6 +34,7 @@
#ifndef _EAL_PRIVATE_H_
#define _EAL_PRIVATE_H_
+#include <stdbool.h>
#include <stdio.h>
#include <rte_pci.h>
@@ -301,4 +302,15 @@ int rte_eal_hugepage_init(void);
*/
int rte_eal_hugepage_attach(void);
+/**
+ * Returns true if the system is able to obtain
+ * physical addresses. Return false if using DMA
+ * addresses through an IOMMU.
+ *
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
+ */
+bool rte_eal_using_phys_addrs(void);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a956bb2..cbb99bd 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -64,6 +64,7 @@
#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
@@ -122,26 +123,24 @@ int rte_xen_dom0_supported(void)
static uint64_t baseaddr_offset;
-static unsigned proc_pagemap_readable;
+static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
static void
-test_proc_pagemap_readable(void)
+test_phys_addrs_available(void)
{
- int fd = open("/proc/self/pagemap", O_RDONLY);
+ uint64_t tmp;
+ phys_addr_t physaddr;
- if (fd < 0) {
+ physaddr = rte_mem_virt2phy(&tmp);
+ if (physaddr == RTE_BAD_PHYS_ADDR) {
RTE_LOG(ERR, EAL,
- "Cannot open /proc/self/pagemap: %s. "
- "virt2phys address translation will not work\n",
+ "Cannot obtain physical addresses: %s. "
+ "Only vfio will function.\n",
strerror(errno));
- return;
+ phys_addrs_available = false;
}
-
- /* Is readable */
- close(fd);
- proc_pagemap_readable = 1;
}
/* Lock page in physical memory and prevent from swapping. */
@@ -190,7 +189,7 @@ rte_mem_virt2phy(const void *virtaddr)
}
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
- if (!proc_pagemap_readable)
+ if (!phys_addrs_available)
return RTE_BAD_PHYS_ADDR;
/* standard page size */
@@ -229,6 +228,9 @@ rte_mem_virt2phy(const void *virtaddr)
* the pfn (page frame number) are bits 0-54 (see
* pagemap.txt in linux Documentation)
*/
+ if ((page & 0x7fffffffffffffULL) == 0)
+ return RTE_BAD_PHYS_ADDR;
+
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
@@ -242,7 +244,7 @@ rte_mem_virt2phy(const void *virtaddr)
static int
find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
- unsigned i;
+ unsigned int i;
phys_addr_t addr;
for (i = 0; i < hpi->num_pages[0]; i++) {
@@ -255,6 +257,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
}
/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned int i;
+ static phys_addr_t addr = 0;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ hugepg_tbl[i].physaddr = addr;
+ addr += hugepg_tbl[i].size;
+ }
+ return 0;
+}
+
+/*
* Check whether address-space layout randomization is enabled in
* the kernel. This is important for multi-process as it can prevent
* two processes mapping data to the same virtual address
@@ -951,7 +969,7 @@ rte_eal_hugepage_init(void)
int nr_hugefiles, nr_hugepages = 0;
void *addr;
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
memset(used_hp, 0, sizeof(used_hp));
@@ -1043,11 +1061,22 @@ rte_eal_hugepage_init(void)
continue;
}
- /* find physical addresses and sockets for each hugepage */
- if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
- RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
- (unsigned)(hpi->hugepage_sz / 0x100000));
- goto fail;
+ if (phys_addrs_available) {
+ /* find physical addresses for each hugepage */
+ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+ } else {
+ /* set physical addresses for each hugepage */
+ if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
}
if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@@ -1289,7 +1318,7 @@ rte_eal_hugepage_attach(void)
"into secondary processes\n");
}
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
if (internal_config.xen_dom0_support) {
#ifdef RTE_LIBRTE_XEN_DOM0
@@ -1426,3 +1455,9 @@ rte_eal_hugepage_attach(void)
close(fd_hugepage);
return -1;
}
+
+bool
+rte_eal_using_phys_addrs(void)
+{
+ return phys_addrs_available;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index e2fc219..61d55b9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -99,8 +99,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
+ if (rte_eal_using_phys_addrs()) {
+ /* map resources for devices that use uio */
+ ret = pci_uio_map_resource(dev);
+ }
break;
default:
RTE_LOG(DEBUG, EAL,
--
2.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [dpdk-dev] [PATCH v4] eal: Support running as unprivileged user
2017-01-31 17:40 ` [dpdk-dev] [PATCH v3] " Ben Walker
@ 2017-01-31 17:44 ` Ben Walker
2017-02-17 14:59 ` Sergio Gonzalez Monroy
2017-02-17 19:28 ` Stephen Hemminger
0 siblings, 2 replies; 9+ messages in thread
From: Ben Walker @ 2017-01-31 17:44 UTC (permalink / raw)
To: dev; +Cc: Ben Walker
For Linux kernel 4.0 and newer, the ability to obtain
physical page frame numbers for unprivileged users from
/proc/self/pagemap was removed. Instead, when an IOMMU
is present, simply choose our own DMA addresses instead.
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
---
lib/librte_eal/common/eal_private.h | 12 +++++
lib/librte_eal/linuxapp/eal/eal_memory.c | 75 +++++++++++++++++++++++---------
lib/librte_eal/linuxapp/eal/eal_pci.c | 6 ++-
3 files changed, 71 insertions(+), 22 deletions(-)
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 9e7d8f6..8b2d323 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -34,6 +34,7 @@
#ifndef _EAL_PRIVATE_H_
#define _EAL_PRIVATE_H_
+#include <stdbool.h>
#include <stdio.h>
#include <rte_pci.h>
@@ -301,4 +302,15 @@ int rte_eal_hugepage_init(void);
*/
int rte_eal_hugepage_attach(void);
+/**
+ * Returns true if the system is able to obtain
+ * physical addresses. Return false if using DMA
+ * addresses through an IOMMU.
+ *
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
+ */
+bool rte_eal_using_phys_addrs(void);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a956bb2..657c6f4 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -64,6 +64,7 @@
#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
@@ -122,26 +123,24 @@ int rte_xen_dom0_supported(void)
static uint64_t baseaddr_offset;
-static unsigned proc_pagemap_readable;
+static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
static void
-test_proc_pagemap_readable(void)
+test_phys_addrs_available(void)
{
- int fd = open("/proc/self/pagemap", O_RDONLY);
+ uint64_t tmp;
+ phys_addr_t physaddr;
- if (fd < 0) {
+ physaddr = rte_mem_virt2phy(&tmp);
+ if (physaddr == RTE_BAD_PHYS_ADDR) {
RTE_LOG(ERR, EAL,
- "Cannot open /proc/self/pagemap: %s. "
- "virt2phys address translation will not work\n",
+ "Cannot obtain physical addresses: %s. "
+ "Only vfio will function.\n",
strerror(errno));
- return;
+ phys_addrs_available = false;
}
-
- /* Is readable */
- close(fd);
- proc_pagemap_readable = 1;
}
/* Lock page in physical memory and prevent from swapping. */
@@ -190,7 +189,7 @@ rte_mem_virt2phy(const void *virtaddr)
}
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
- if (!proc_pagemap_readable)
+ if (!phys_addrs_available)
return RTE_BAD_PHYS_ADDR;
/* standard page size */
@@ -229,6 +228,9 @@ rte_mem_virt2phy(const void *virtaddr)
* the pfn (page frame number) are bits 0-54 (see
* pagemap.txt in linux Documentation)
*/
+ if ((page & 0x7fffffffffffffULL) == 0)
+ return RTE_BAD_PHYS_ADDR;
+
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
@@ -242,7 +244,7 @@ rte_mem_virt2phy(const void *virtaddr)
static int
find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
- unsigned i;
+ unsigned int i;
phys_addr_t addr;
for (i = 0; i < hpi->num_pages[0]; i++) {
@@ -255,6 +257,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
}
/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned int i;
+ static phys_addr_t addr;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ hugepg_tbl[i].physaddr = addr;
+ addr += hugepg_tbl[i].size;
+ }
+ return 0;
+}
+
+/*
* Check whether address-space layout randomization is enabled in
* the kernel. This is important for multi-process as it can prevent
* two processes mapping data to the same virtual address
@@ -951,7 +969,7 @@ rte_eal_hugepage_init(void)
int nr_hugefiles, nr_hugepages = 0;
void *addr;
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
memset(used_hp, 0, sizeof(used_hp));
@@ -1043,11 +1061,22 @@ rte_eal_hugepage_init(void)
continue;
}
- /* find physical addresses and sockets for each hugepage */
- if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
- RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
- (unsigned)(hpi->hugepage_sz / 0x100000));
- goto fail;
+ if (phys_addrs_available) {
+ /* find physical addresses for each hugepage */
+ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+ } else {
+ /* set physical addresses for each hugepage */
+ if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
}
if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@@ -1289,7 +1318,7 @@ rte_eal_hugepage_attach(void)
"into secondary processes\n");
}
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
if (internal_config.xen_dom0_support) {
#ifdef RTE_LIBRTE_XEN_DOM0
@@ -1426,3 +1455,9 @@ rte_eal_hugepage_attach(void)
close(fd_hugepage);
return -1;
}
+
+bool
+rte_eal_using_phys_addrs(void)
+{
+ return phys_addrs_available;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index e2fc219..61d55b9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -99,8 +99,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
+ if (rte_eal_using_phys_addrs()) {
+ /* map resources for devices that use uio */
+ ret = pci_uio_map_resource(dev);
+ }
break;
default:
RTE_LOG(DEBUG, EAL,
--
2.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [dpdk-dev] [PATCH v4] eal: Support running as unprivileged user
2017-01-31 17:44 ` [dpdk-dev] [PATCH v4] " Ben Walker
@ 2017-02-17 14:59 ` Sergio Gonzalez Monroy
2017-03-09 16:10 ` Thomas Monjalon
2017-02-17 19:28 ` Stephen Hemminger
1 sibling, 1 reply; 9+ messages in thread
From: Sergio Gonzalez Monroy @ 2017-02-17 14:59 UTC (permalink / raw)
To: dev
On 31/01/2017 17:44, Ben Walker wrote:
> For Linux kernel 4.0 and newer, the ability to obtain
> physical page frame numbers for unprivileged users from
> /proc/self/pagemap was removed. Instead, when an IOMMU
> is present, simply choose our own DMA addresses instead.
>
> Signed-off-by: Ben Walker <benjamin.walker@intel.com>
> ---
> lib/librte_eal/common/eal_private.h | 12 +++++
> lib/librte_eal/linuxapp/eal/eal_memory.c | 75 +++++++++++++++++++++++---------
> lib/librte_eal/linuxapp/eal/eal_pci.c | 6 ++-
> 3 files changed, 71 insertions(+), 22 deletions(-)
Acked-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
PS: Please keep a summary of changes made in each version on future
patch sets (after the triple dash --- )
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [dpdk-dev] [PATCH v4] eal: Support running as unprivileged user
2017-02-17 14:59 ` Sergio Gonzalez Monroy
@ 2017-03-09 16:10 ` Thomas Monjalon
0 siblings, 0 replies; 9+ messages in thread
From: Thomas Monjalon @ 2017-03-09 16:10 UTC (permalink / raw)
To: Ben Walker; +Cc: dev, Sergio Gonzalez Monroy
2017-02-17 14:59, Sergio Gonzalez Monroy:
> On 31/01/2017 17:44, Ben Walker wrote:
> > For Linux kernel 4.0 and newer, the ability to obtain
> > physical page frame numbers for unprivileged users from
> > /proc/self/pagemap was removed. Instead, when an IOMMU
> > is present, simply choose our own DMA addresses instead.
> >
> > Signed-off-by: Ben Walker <benjamin.walker@intel.com>
> > ---
> > lib/librte_eal/common/eal_private.h | 12 +++++
> > lib/librte_eal/linuxapp/eal/eal_memory.c | 75 +++++++++++++++++++++++---------
> > lib/librte_eal/linuxapp/eal/eal_pci.c | 6 ++-
> > 3 files changed, 71 insertions(+), 22 deletions(-)
>
> Acked-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
Applied, thanks
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [dpdk-dev] [PATCH v4] eal: Support running as unprivileged user
2017-01-31 17:44 ` [dpdk-dev] [PATCH v4] " Ben Walker
2017-02-17 14:59 ` Sergio Gonzalez Monroy
@ 2017-02-17 19:28 ` Stephen Hemminger
2017-03-09 16:11 ` Thomas Monjalon
1 sibling, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2017-02-17 19:28 UTC (permalink / raw)
To: Ben Walker; +Cc: dev
On Tue, 31 Jan 2017 10:44:53 -0700
Ben Walker <benjamin.walker@intel.com> wrote:
> + if (physaddr == RTE_BAD_PHYS_ADDR) {
> RTE_LOG(ERR, EAL,
> - "Cannot open /proc/self/pagemap: %s. "
> - "virt2phys address translation will not work\n",
> + "Cannot obtain physical addresses: %s. "
> + "Only vfio will function.\n",
Please don't split a single error message across multiple lines. It makes
it harder for user to find the source code lines with simple grep.
Better to just have one long line for format string, or better yet be less wordy.
Yes, the existing DPDK code has lots of these issues.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [dpdk-dev] [PATCH v4] eal: Support running as unprivileged user
2017-02-17 19:28 ` Stephen Hemminger
@ 2017-03-09 16:11 ` Thomas Monjalon
0 siblings, 0 replies; 9+ messages in thread
From: Thomas Monjalon @ 2017-03-09 16:11 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: dev, Ben Walker
2017-02-17 11:28, Stephen Hemminger:
> On Tue, 31 Jan 2017 10:44:53 -0700
> Ben Walker <benjamin.walker@intel.com> wrote:
>
> > + if (physaddr == RTE_BAD_PHYS_ADDR) {
> > RTE_LOG(ERR, EAL,
> > - "Cannot open /proc/self/pagemap: %s. "
> > - "virt2phys address translation will not work\n",
> > + "Cannot obtain physical addresses: %s. "
> > + "Only vfio will function.\n",
>
> Please don't split a single error message across multiple lines. It makes
> it harder for user to find the source code lines with simple grep.
> Better to just have one long line for format string, or better yet be less wordy.
>
> Yes, the existing DPDK code has lots of these issues.
You're right.
Here the split is acceptable as there is strerror(errno) in the middle.
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2017-03-09 16:11 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-03 22:30 [dpdk-dev] [PATCH] eal: Support running as unprivileged user Ben Walker
2017-01-03 22:56 ` [dpdk-dev] [PATCH v2] " Ben Walker
2017-01-18 17:05 ` Sergio Gonzalez Monroy
2017-01-31 17:40 ` [dpdk-dev] [PATCH v3] " Ben Walker
2017-01-31 17:44 ` [dpdk-dev] [PATCH v4] " Ben Walker
2017-02-17 14:59 ` Sergio Gonzalez Monroy
2017-03-09 16:10 ` Thomas Monjalon
2017-02-17 19:28 ` Stephen Hemminger
2017-03-09 16:11 ` Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).