Not all OS's follow Linux's memory layout, which may lead to problems following the suggested common address hint absent of a base-virtaddr flag. Make this address hint OS-specific. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- lib/librte_eal/common/eal_common_memory.c | 19 +------------------ lib/librte_eal/common/eal_private.h | 6 ++++++ lib/librte_eal/freebsd/eal/eal_memory.c | 10 ++++++++++ lib/librte_eal/linux/eal/eal_memory.c | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 19ea47570..4a9cc1f19 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -40,23 +40,6 @@ static void *next_baseaddr; static uint64_t system_page_sz; -#ifdef RTE_ARCH_64 -/* - * Linux kernel uses a really high address as starting address for serving - * mmaps calls. If there exists addressing limitations and IOVA mode is VA, - * this starting address is likely too high for those devices. However, it - * is possible to use a lower address in the process virtual address space - * as with 64 bits there is a lot of available space. - * - * Current known limitations are 39 or 40 bits. Setting the starting address - * at 4GB implies there are 508GB or 1020GB for mapping the available - * hugepages. This is likely enough for most systems, although a device with - * addressing limitations should call rte_mem_check_dma_mask for ensuring all - * memory is within supported range. - */ -static uint64_t baseaddr = 0x100000000; -#endif - #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5 void * eal_get_virtual_area(void *requested_addr, size_t *size, @@ -85,7 +68,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, #ifdef RTE_ARCH_64 if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 && rte_eal_process_type() == RTE_PROC_PRIMARY) - next_baseaddr = (void *) baseaddr; + next_baseaddr = (void *) eal_get_baseaddr(); #endif if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 798ede553..31eae2278 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -381,4 +381,10 @@ rte_option_init(void); void rte_option_usage(void); +/** + * Get OS-specific EAL mapping base address. + */ +uint64_t +eal_get_baseaddr(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c index 9b9a0577a..1bfdb52fb 100644 --- a/lib/librte_eal/freebsd/eal/eal_memory.c +++ b/lib/librte_eal/freebsd/eal/eal_memory.c @@ -22,6 +22,16 @@ #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) +uint64_t eal_get_baseaddr(void) +{ + /* + * FreeBSD may allocate something in the space we will be mapping things + * before we get a chance to do that, so use a base address that's far + * away from where malloc() et al usually map things. + */ + return 0x1000000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c index 1c089a1ef..8516f0d35 100644 --- a/lib/librte_eal/linux/eal/eal_memory.c +++ b/lib/librte_eal/linux/eal/eal_memory.c @@ -70,6 +70,26 @@ static int phys_addrs_available = -1; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +uint64_t eal_get_baseaddr(void) +{ + /* + * Linux kernel uses a really high address as starting address for + * serving mmaps calls. If there exists addressing limitations and IOVA + * mode is VA, this starting address is likely too high for those + * devices. However, it is possible to use a lower address in the + * process virtual address space as with 64 bits there is a lot of + * available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting + * address at 4GB implies there are 508GB or 1020GB for mapping the + * available hugepages. This is likely enough for most systems, although + * a device with addressing limitations should call + * rte_mem_check_dma_mask for ensuring all memory is within supported + * range. + */ + return 0x100000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ -- 2.17.1
Currently, mem config will be mapped without using the virtual area reservation infrastructure, which means it will be mapped at an arbitrary location. This may cause failures to map the shared config in secondary process due to things like PCI whitelist arguments allocating memory in a space where the primary has allocated the shared mem config. Fix this by using virtual area reservation to reserve space for the mem config, thereby avoiding the problem and reserving the shared config (hopefully) far away from any normal memory allocations. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- lib/librte_eal/freebsd/eal/eal.c | 27 +++++++++++++++++++++------ lib/librte_eal/linux/eal/eal.c | 30 ++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/lib/librte_eal/freebsd/eal/eal.c b/lib/librte_eal/freebsd/eal/eal.c index d53f0fe69..fb43c00fd 100644 --- a/lib/librte_eal/freebsd/eal/eal.c +++ b/lib/librte_eal/freebsd/eal/eal.c @@ -219,7 +219,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -236,7 +237,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -254,15 +255,29 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(NULL, &cfg_len, + sysconf(_SC_PAGESIZE), EAL_VIRTUAL_AREA_ADDR_IS_HINT, + 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); close(mem_cfg_fd); mem_cfg_fd = -1; return -1; } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); rte_config.mem_config = rte_mem_cfg_addr; diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 34db78753..ed0ebcc8e 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -305,7 +305,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -330,7 +331,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -348,13 +349,26 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ - close(mem_cfg_fd); - mem_cfg_fd = -1; + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, &cfg_len, + sysconf(_SC_PAGESIZE), EAL_VIRTUAL_AREA_ADDR_IS_HINT, + 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); return -1; } -- 2.17.1
Not all OS's follow Linux's memory layout, which may lead to problems following the suggested common address hint absent of a base-virtaddr flag. Make this address hint OS-specific. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- lib/librte_eal/common/eal_common_memory.c | 19 +------------------ lib/librte_eal/common/eal_private.h | 6 ++++++ lib/librte_eal/freebsd/eal/eal_memory.c | 10 ++++++++++ lib/librte_eal/linux/eal/eal_memory.c | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 19ea47570..4a9cc1f19 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -40,23 +40,6 @@ static void *next_baseaddr; static uint64_t system_page_sz; -#ifdef RTE_ARCH_64 -/* - * Linux kernel uses a really high address as starting address for serving - * mmaps calls. If there exists addressing limitations and IOVA mode is VA, - * this starting address is likely too high for those devices. However, it - * is possible to use a lower address in the process virtual address space - * as with 64 bits there is a lot of available space. - * - * Current known limitations are 39 or 40 bits. Setting the starting address - * at 4GB implies there are 508GB or 1020GB for mapping the available - * hugepages. This is likely enough for most systems, although a device with - * addressing limitations should call rte_mem_check_dma_mask for ensuring all - * memory is within supported range. - */ -static uint64_t baseaddr = 0x100000000; -#endif - #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5 void * eal_get_virtual_area(void *requested_addr, size_t *size, @@ -85,7 +68,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, #ifdef RTE_ARCH_64 if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 && rte_eal_process_type() == RTE_PROC_PRIMARY) - next_baseaddr = (void *) baseaddr; + next_baseaddr = (void *) eal_get_baseaddr(); #endif if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 798ede553..31eae2278 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -381,4 +381,10 @@ rte_option_init(void); void rte_option_usage(void); +/** + * Get OS-specific EAL mapping base address. + */ +uint64_t +eal_get_baseaddr(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c index 9b9a0577a..1bfdb52fb 100644 --- a/lib/librte_eal/freebsd/eal/eal_memory.c +++ b/lib/librte_eal/freebsd/eal/eal_memory.c @@ -22,6 +22,16 @@ #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) +uint64_t eal_get_baseaddr(void) +{ + /* + * FreeBSD may allocate something in the space we will be mapping things + * before we get a chance to do that, so use a base address that's far + * away from where malloc() et al usually map things. + */ + return 0x1000000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c index 1c089a1ef..8516f0d35 100644 --- a/lib/librte_eal/linux/eal/eal_memory.c +++ b/lib/librte_eal/linux/eal/eal_memory.c @@ -70,6 +70,26 @@ static int phys_addrs_available = -1; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +uint64_t eal_get_baseaddr(void) +{ + /* + * Linux kernel uses a really high address as starting address for + * serving mmaps calls. If there exists addressing limitations and IOVA + * mode is VA, this starting address is likely too high for those + * devices. However, it is possible to use a lower address in the + * process virtual address space as with 64 bits there is a lot of + * available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting + * address at 4GB implies there are 508GB or 1020GB for mapping the + * available hugepages. This is likely enough for most systems, although + * a device with addressing limitations should call + * rte_mem_check_dma_mask for ensuring all memory is within supported + * range. + */ + return 0x100000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ -- 2.17.1
Currently, mem config will be mapped without using the virtual area reservation infrastructure, which means it will be mapped at an arbitrary location. This may cause failures to map the shared config in secondary process due to things like PCI whitelist arguments allocating memory in a space where the primary has allocated the shared mem config. Fix this by using virtual area reservation to reserve space for the mem config, thereby avoiding the problem and reserving the shared config (hopefully) far away from any normal memory allocations. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- lib/librte_eal/freebsd/eal/eal.c | 27 +++++++++++++++++++++------ lib/librte_eal/linux/eal/eal.c | 30 ++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/lib/librte_eal/freebsd/eal/eal.c b/lib/librte_eal/freebsd/eal/eal.c index d53f0fe69..fb43c00fd 100644 --- a/lib/librte_eal/freebsd/eal/eal.c +++ b/lib/librte_eal/freebsd/eal/eal.c @@ -219,7 +219,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -236,7 +237,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -254,15 +255,29 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(NULL, &cfg_len, + sysconf(_SC_PAGESIZE), EAL_VIRTUAL_AREA_ADDR_IS_HINT, + 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); close(mem_cfg_fd); mem_cfg_fd = -1; return -1; } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); rte_config.mem_config = rte_mem_cfg_addr; diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 34db78753..ed0ebcc8e 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -305,7 +305,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -330,7 +331,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -348,13 +349,26 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ - close(mem_cfg_fd); - mem_cfg_fd = -1; + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, &cfg_len, + sysconf(_SC_PAGESIZE), EAL_VIRTUAL_AREA_ADDR_IS_HINT, + 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); return -1; } -- 2.17.1
Not all OS's follow Linux's memory layout, which may lead to problems following the suggested common address hint absent of a base-virtaddr flag. Make this address hint OS-specific. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- lib/librte_eal/common/eal_common_memory.c | 19 +------------------ lib/librte_eal/common/eal_private.h | 6 ++++++ lib/librte_eal/freebsd/eal/eal_memory.c | 10 ++++++++++ lib/librte_eal/linux/eal/eal_memory.c | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 19ea47570..4a9cc1f19 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -40,23 +40,6 @@ static void *next_baseaddr; static uint64_t system_page_sz; -#ifdef RTE_ARCH_64 -/* - * Linux kernel uses a really high address as starting address for serving - * mmaps calls. If there exists addressing limitations and IOVA mode is VA, - * this starting address is likely too high for those devices. However, it - * is possible to use a lower address in the process virtual address space - * as with 64 bits there is a lot of available space. - * - * Current known limitations are 39 or 40 bits. Setting the starting address - * at 4GB implies there are 508GB or 1020GB for mapping the available - * hugepages. This is likely enough for most systems, although a device with - * addressing limitations should call rte_mem_check_dma_mask for ensuring all - * memory is within supported range. - */ -static uint64_t baseaddr = 0x100000000; -#endif - #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5 void * eal_get_virtual_area(void *requested_addr, size_t *size, @@ -85,7 +68,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, #ifdef RTE_ARCH_64 if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 && rte_eal_process_type() == RTE_PROC_PRIMARY) - next_baseaddr = (void *) baseaddr; + next_baseaddr = (void *) eal_get_baseaddr(); #endif if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 798ede553..31eae2278 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -381,4 +381,10 @@ rte_option_init(void); void rte_option_usage(void); +/** + * Get OS-specific EAL mapping base address. + */ +uint64_t +eal_get_baseaddr(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c index 9b9a0577a..1bfdb52fb 100644 --- a/lib/librte_eal/freebsd/eal/eal_memory.c +++ b/lib/librte_eal/freebsd/eal/eal_memory.c @@ -22,6 +22,16 @@ #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) +uint64_t eal_get_baseaddr(void) +{ + /* + * FreeBSD may allocate something in the space we will be mapping things + * before we get a chance to do that, so use a base address that's far + * away from where malloc() et al usually map things. + */ + return 0x1000000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c index 1c089a1ef..8516f0d35 100644 --- a/lib/librte_eal/linux/eal/eal_memory.c +++ b/lib/librte_eal/linux/eal/eal_memory.c @@ -70,6 +70,26 @@ static int phys_addrs_available = -1; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +uint64_t eal_get_baseaddr(void) +{ + /* + * Linux kernel uses a really high address as starting address for + * serving mmaps calls. If there exists addressing limitations and IOVA + * mode is VA, this starting address is likely too high for those + * devices. However, it is possible to use a lower address in the + * process virtual address space as with 64 bits there is a lot of + * available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting + * address at 4GB implies there are 508GB or 1020GB for mapping the + * available hugepages. This is likely enough for most systems, although + * a device with addressing limitations should call + * rte_mem_check_dma_mask for ensuring all memory is within supported + * range. + */ + return 0x100000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ -- 2.17.1
Currently, mem config will be mapped without using the virtual area reservation infrastructure, which means it will be mapped at an arbitrary location. This may cause failures to map the shared config in secondary process due to things like PCI whitelist arguments allocating memory in a space where the primary has allocated the shared mem config. Fix this by using virtual area reservation to reserve space for the mem config, thereby avoiding the problem and reserving the shared config (hopefully) far away from any normal memory allocations. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- lib/librte_eal/freebsd/eal/eal.c | 27 +++++++++++++++++++++------ lib/librte_eal/linux/eal/eal.c | 30 ++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/lib/librte_eal/freebsd/eal/eal.c b/lib/librte_eal/freebsd/eal/eal.c index d53f0fe69..fb43c00fd 100644 --- a/lib/librte_eal/freebsd/eal/eal.c +++ b/lib/librte_eal/freebsd/eal/eal.c @@ -219,7 +219,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -236,7 +237,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -254,15 +255,29 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(NULL, &cfg_len, + sysconf(_SC_PAGESIZE), EAL_VIRTUAL_AREA_ADDR_IS_HINT, + 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); close(mem_cfg_fd); mem_cfg_fd = -1; return -1; } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); rte_config.mem_config = rte_mem_cfg_addr; diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 34db78753..ed0ebcc8e 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -305,7 +305,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -330,7 +331,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -348,13 +349,26 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ - close(mem_cfg_fd); - mem_cfg_fd = -1; + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, &cfg_len, + sysconf(_SC_PAGESIZE), EAL_VIRTUAL_AREA_ADDR_IS_HINT, + 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); return -1; } -- 2.17.1
On 29-Jul-19 12:18 PM, Anatoly Burakov wrote:
> Currently, mem config will be mapped without using the virtual
> area reservation infrastructure, which means it will be mapped
> at an arbitrary location. This may cause failures to map the
> shared config in secondary process due to things like PCI
> whitelist arguments allocating memory in a space where the
> primary has allocated the shared mem config.
>
> Fix this by using virtual area reservation to reserve space for
> the mem config, thereby avoiding the problem and reserving the
> shared config (hopefully) far away from any normal memory
> allocations.
>
> Cc: stable@dpdk.org
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
Apologies for multiple sends, the email appeared to not be getting
through...
--
Thanks,
Anatoly
Not all OS's follow Linux's memory layout, which may lead to problems following the suggested common address hint absent of a base-virtaddr flag. Make this address hint OS-specific. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- lib/librte_eal/common/eal_common_memory.c | 19 +------------------ lib/librte_eal/common/eal_private.h | 6 ++++++ lib/librte_eal/freebsd/eal/eal_memory.c | 10 ++++++++++ lib/librte_eal/linux/eal/eal_memory.c | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 19ea47570..4a9cc1f19 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -40,23 +40,6 @@ static void *next_baseaddr; static uint64_t system_page_sz; -#ifdef RTE_ARCH_64 -/* - * Linux kernel uses a really high address as starting address for serving - * mmaps calls. If there exists addressing limitations and IOVA mode is VA, - * this starting address is likely too high for those devices. However, it - * is possible to use a lower address in the process virtual address space - * as with 64 bits there is a lot of available space. - * - * Current known limitations are 39 or 40 bits. Setting the starting address - * at 4GB implies there are 508GB or 1020GB for mapping the available - * hugepages. This is likely enough for most systems, although a device with - * addressing limitations should call rte_mem_check_dma_mask for ensuring all - * memory is within supported range. - */ -static uint64_t baseaddr = 0x100000000; -#endif - #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5 void * eal_get_virtual_area(void *requested_addr, size_t *size, @@ -85,7 +68,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, #ifdef RTE_ARCH_64 if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 && rte_eal_process_type() == RTE_PROC_PRIMARY) - next_baseaddr = (void *) baseaddr; + next_baseaddr = (void *) eal_get_baseaddr(); #endif if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 798ede553..31eae2278 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -381,4 +381,10 @@ rte_option_init(void); void rte_option_usage(void); +/** + * Get OS-specific EAL mapping base address. + */ +uint64_t +eal_get_baseaddr(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c index 9b9a0577a..1bfdb52fb 100644 --- a/lib/librte_eal/freebsd/eal/eal_memory.c +++ b/lib/librte_eal/freebsd/eal/eal_memory.c @@ -22,6 +22,16 @@ #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) +uint64_t eal_get_baseaddr(void) +{ + /* + * FreeBSD may allocate something in the space we will be mapping things + * before we get a chance to do that, so use a base address that's far + * away from where malloc() et al usually map things. + */ + return 0x1000000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c index 1c089a1ef..8516f0d35 100644 --- a/lib/librte_eal/linux/eal/eal_memory.c +++ b/lib/librte_eal/linux/eal/eal_memory.c @@ -70,6 +70,26 @@ static int phys_addrs_available = -1; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +uint64_t eal_get_baseaddr(void) +{ + /* + * Linux kernel uses a really high address as starting address for + * serving mmaps calls. If there exists addressing limitations and IOVA + * mode is VA, this starting address is likely too high for those + * devices. However, it is possible to use a lower address in the + * process virtual address space as with 64 bits there is a lot of + * available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting + * address at 4GB implies there are 508GB or 1020GB for mapping the + * available hugepages. This is likely enough for most systems, although + * a device with addressing limitations should call + * rte_mem_check_dma_mask for ensuring all memory is within supported + * range. + */ + return 0x100000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ -- 2.17.1
Currently, mem config will be mapped without using the virtual area reservation infrastructure, which means it will be mapped at an arbitrary location. This may cause failures to map the shared config in secondary process due to things like PCI whitelist arguments allocating memory in a space where the primary has allocated the shared mem config. Fix this by using virtual area reservation to reserve space for the mem config, thereby avoiding the problem and reserving the shared config (hopefully) far away from any normal memory allocations. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- Notes: v2: - Fix issue with unneeded ADDR_IS_HINT flag that broke things on 32-bit builds lib/librte_eal/freebsd/eal/eal.c | 26 ++++++++++++++++++++------ lib/librte_eal/linux/eal/eal.c | 29 +++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/lib/librte_eal/freebsd/eal/eal.c b/lib/librte_eal/freebsd/eal/eal.c index d53f0fe69..5d2e147b1 100644 --- a/lib/librte_eal/freebsd/eal/eal.c +++ b/lib/librte_eal/freebsd/eal/eal.c @@ -219,7 +219,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -236,7 +237,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -254,15 +255,28 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(NULL, &cfg_len, + sysconf(_SC_PAGESIZE), 0, 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); close(mem_cfg_fd); mem_cfg_fd = -1; return -1; } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); rte_config.mem_config = rte_mem_cfg_addr; diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 34db78753..bfd3bc2da 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -305,7 +305,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -330,7 +331,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -348,13 +349,25 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ - close(mem_cfg_fd); - mem_cfg_fd = -1; + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, &cfg_len, + sysconf(_SC_PAGESIZE), 0, 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); return -1; } -- 2.17.1
Not all OS's follow Linux's memory layout, which may lead to problems following the suggested common address hint absent of a base-virtaddr flag. Make this address hint OS-specific. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- lib/librte_eal/common/eal_common_memory.c | 19 +------------------ lib/librte_eal/common/eal_private.h | 6 ++++++ lib/librte_eal/freebsd/eal/eal_memory.c | 10 ++++++++++ lib/librte_eal/linux/eal/eal_memory.c | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 19ea47570..4a9cc1f19 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -40,23 +40,6 @@ static void *next_baseaddr; static uint64_t system_page_sz; -#ifdef RTE_ARCH_64 -/* - * Linux kernel uses a really high address as starting address for serving - * mmaps calls. If there exists addressing limitations and IOVA mode is VA, - * this starting address is likely too high for those devices. However, it - * is possible to use a lower address in the process virtual address space - * as with 64 bits there is a lot of available space. - * - * Current known limitations are 39 or 40 bits. Setting the starting address - * at 4GB implies there are 508GB or 1020GB for mapping the available - * hugepages. This is likely enough for most systems, although a device with - * addressing limitations should call rte_mem_check_dma_mask for ensuring all - * memory is within supported range. - */ -static uint64_t baseaddr = 0x100000000; -#endif - #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5 void * eal_get_virtual_area(void *requested_addr, size_t *size, @@ -85,7 +68,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, #ifdef RTE_ARCH_64 if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 && rte_eal_process_type() == RTE_PROC_PRIMARY) - next_baseaddr = (void *) baseaddr; + next_baseaddr = (void *) eal_get_baseaddr(); #endif if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 798ede553..31eae2278 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -381,4 +381,10 @@ rte_option_init(void); void rte_option_usage(void); +/** + * Get OS-specific EAL mapping base address. + */ +uint64_t +eal_get_baseaddr(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c index 9b9a0577a..1bfdb52fb 100644 --- a/lib/librte_eal/freebsd/eal/eal_memory.c +++ b/lib/librte_eal/freebsd/eal/eal_memory.c @@ -22,6 +22,16 @@ #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) +uint64_t eal_get_baseaddr(void) +{ + /* + * FreeBSD may allocate something in the space we will be mapping things + * before we get a chance to do that, so use a base address that's far + * away from where malloc() et al usually map things. + */ + return 0x1000000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c index 1c089a1ef..8516f0d35 100644 --- a/lib/librte_eal/linux/eal/eal_memory.c +++ b/lib/librte_eal/linux/eal/eal_memory.c @@ -70,6 +70,26 @@ static int phys_addrs_available = -1; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +uint64_t eal_get_baseaddr(void) +{ + /* + * Linux kernel uses a really high address as starting address for + * serving mmaps calls. If there exists addressing limitations and IOVA + * mode is VA, this starting address is likely too high for those + * devices. However, it is possible to use a lower address in the + * process virtual address space as with 64 bits there is a lot of + * available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting + * address at 4GB implies there are 508GB or 1020GB for mapping the + * available hugepages. This is likely enough for most systems, although + * a device with addressing limitations should call + * rte_mem_check_dma_mask for ensuring all memory is within supported + * range. + */ + return 0x100000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ -- 2.17.1
Currently, mem config will be mapped without using the virtual area reservation infrastructure, which means it will be mapped at an arbitrary location. This may cause failures to map the shared config in secondary process due to things like PCI whitelist arguments allocating memory in a space where the primary has allocated the shared mem config. Fix this by using virtual area reservation to reserve space for the mem config, thereby avoiding the problem and reserving the shared config (hopefully) far away from any normal memory allocations. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- Notes: v3: - Fix alignment issues with base address v2: - Fix issue with unneeded ADDR_IS_HINT flag that broke things on 32-bit builds lib/librte_eal/freebsd/eal/eal.c | 26 ++++++++++++++++++++------ lib/librte_eal/linux/eal/eal.c | 29 +++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/lib/librte_eal/freebsd/eal/eal.c b/lib/librte_eal/freebsd/eal/eal.c index d53f0fe69..5d2e147b1 100644 --- a/lib/librte_eal/freebsd/eal/eal.c +++ b/lib/librte_eal/freebsd/eal/eal.c @@ -219,7 +219,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -236,7 +237,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -254,15 +255,28 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(NULL, &cfg_len, + sysconf(_SC_PAGESIZE), 0, 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); close(mem_cfg_fd); mem_cfg_fd = -1; return -1; } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); rte_config.mem_config = rte_mem_cfg_addr; diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 34db78753..bfd3bc2da 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -305,7 +305,8 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t cfg_len = sizeof(*rte_config.mem_config); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -330,7 +331,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -348,13 +349,25 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ - close(mem_cfg_fd); - mem_cfg_fd = -1; + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, &cfg_len, + sysconf(_SC_PAGESIZE), 0, 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); return -1; } -- 2.17.1
Not all OS's follow Linux's memory layout, which may lead to problems following the suggested common address hint absent of a base-virtaddr flag. Make this address hint OS-specific. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- lib/librte_eal/common/eal_common_memory.c | 19 +------------------ lib/librte_eal/common/eal_private.h | 6 ++++++ lib/librte_eal/freebsd/eal/eal_memory.c | 10 ++++++++++ lib/librte_eal/linux/eal/eal_memory.c | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 19ea47570..4a9cc1f19 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -40,23 +40,6 @@ static void *next_baseaddr; static uint64_t system_page_sz; -#ifdef RTE_ARCH_64 -/* - * Linux kernel uses a really high address as starting address for serving - * mmaps calls. If there exists addressing limitations and IOVA mode is VA, - * this starting address is likely too high for those devices. However, it - * is possible to use a lower address in the process virtual address space - * as with 64 bits there is a lot of available space. - * - * Current known limitations are 39 or 40 bits. Setting the starting address - * at 4GB implies there are 508GB or 1020GB for mapping the available - * hugepages. This is likely enough for most systems, although a device with - * addressing limitations should call rte_mem_check_dma_mask for ensuring all - * memory is within supported range. - */ -static uint64_t baseaddr = 0x100000000; -#endif - #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5 void * eal_get_virtual_area(void *requested_addr, size_t *size, @@ -85,7 +68,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, #ifdef RTE_ARCH_64 if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 && rte_eal_process_type() == RTE_PROC_PRIMARY) - next_baseaddr = (void *) baseaddr; + next_baseaddr = (void *) eal_get_baseaddr(); #endif if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 798ede553..31eae2278 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -381,4 +381,10 @@ rte_option_init(void); void rte_option_usage(void); +/** + * Get OS-specific EAL mapping base address. + */ +uint64_t +eal_get_baseaddr(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c index 9b9a0577a..1bfdb52fb 100644 --- a/lib/librte_eal/freebsd/eal/eal_memory.c +++ b/lib/librte_eal/freebsd/eal/eal_memory.c @@ -22,6 +22,16 @@ #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) +uint64_t eal_get_baseaddr(void) +{ + /* + * FreeBSD may allocate something in the space we will be mapping things + * before we get a chance to do that, so use a base address that's far + * away from where malloc() et al usually map things. + */ + return 0x1000000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c index 1c089a1ef..8516f0d35 100644 --- a/lib/librte_eal/linux/eal/eal_memory.c +++ b/lib/librte_eal/linux/eal/eal_memory.c @@ -70,6 +70,26 @@ static int phys_addrs_available = -1; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +uint64_t eal_get_baseaddr(void) +{ + /* + * Linux kernel uses a really high address as starting address for + * serving mmaps calls. If there exists addressing limitations and IOVA + * mode is VA, this starting address is likely too high for those + * devices. However, it is possible to use a lower address in the + * process virtual address space as with 64 bits there is a lot of + * available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting + * address at 4GB implies there are 508GB or 1020GB for mapping the + * available hugepages. This is likely enough for most systems, although + * a device with addressing limitations should call + * rte_mem_check_dma_mask for ensuring all memory is within supported + * range. + */ + return 0x100000000; +} + /* * Get physical address of any mapped virtual address in the current process. */ -- 2.17.1
Currently, mem config will be mapped without using the virtual area reservation infrastructure, which means it will be mapped at an arbitrary location. This may cause failures to map the shared config in secondary process due to things like PCI whitelist arguments allocating memory in a space where the primary has allocated the shared mem config. Fix this by using virtual area reservation to reserve space for the mem config, thereby avoiding the problem and reserving the shared config (hopefully) far away from any normal memory allocations. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- Notes: v4: - Fix mem config length to always be page-aligned v3: - Fix alignment issues with base address v2: - Fix issue with unneeded ADDR_IS_HINT flag that broke things on 32-bit builds lib/librte_eal/freebsd/eal/eal.c | 28 +++++++++++++++++++++------ lib/librte_eal/linux/eal/eal.c | 33 +++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/lib/librte_eal/freebsd/eal/eal.c b/lib/librte_eal/freebsd/eal/eal.c index d53f0fe69..ce3c5ed2d 100644 --- a/lib/librte_eal/freebsd/eal/eal.c +++ b/lib/librte_eal/freebsd/eal/eal.c @@ -219,7 +219,10 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t page_sz = sysconf(_SC_PAGE_SIZE); + size_t cfg_len = sizeof(*rte_config.mem_config); + size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -236,7 +239,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -254,15 +257,28 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(NULL, &cfg_len_aligned, page_sz, + 0, 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); close(mem_cfg_fd); mem_cfg_fd = -1; return -1; } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len_aligned, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); rte_config.mem_config = rte_mem_cfg_addr; diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 34db78753..55a3bb971 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -305,7 +305,10 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t page_sz = sysconf(_SC_PAGE_SIZE); + size_t cfg_len = sizeof(*rte_config.mem_config); + size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -317,7 +320,7 @@ rte_eal_config_create(void) if (internal_config.base_virtaddr != 0) rte_mem_cfg_addr = (void *) RTE_ALIGN_FLOOR(internal_config.base_virtaddr - - sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE)); + sizeof(struct rte_mem_config), page_sz); else rte_mem_cfg_addr = NULL; @@ -330,7 +333,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -348,13 +351,25 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ - close(mem_cfg_fd); - mem_cfg_fd = -1; + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, + &cfg_len_aligned, page_sz, 0, 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len_aligned, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); return -1; } -- 2.17.1
On Wed, Jul 31, 2019 at 2:47 PM Anatoly Burakov <anatoly.burakov@intel.com> wrote: > > Not all OS's follow Linux's memory layout, which may lead to > problems following the suggested common address hint absent > of a base-virtaddr flag. Make this address hint OS-specific. > > Cc: stable@dpdk.org Missing Fixes: ? > > Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> > --- > lib/librte_eal/common/eal_common_memory.c | 19 +------------------ > lib/librte_eal/common/eal_private.h | 6 ++++++ > lib/librte_eal/freebsd/eal/eal_memory.c | 10 ++++++++++ > lib/librte_eal/linux/eal/eal_memory.c | 20 ++++++++++++++++++++ > 4 files changed, 37 insertions(+), 18 deletions(-) > > diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c > index 19ea47570..4a9cc1f19 100644 > --- a/lib/librte_eal/common/eal_common_memory.c > +++ b/lib/librte_eal/common/eal_common_memory.c > @@ -40,23 +40,6 @@ > static void *next_baseaddr; > static uint64_t system_page_sz; > > -#ifdef RTE_ARCH_64 > -/* > - * Linux kernel uses a really high address as starting address for serving > - * mmaps calls. If there exists addressing limitations and IOVA mode is VA, > - * this starting address is likely too high for those devices. However, it > - * is possible to use a lower address in the process virtual address space > - * as with 64 bits there is a lot of available space. > - * > - * Current known limitations are 39 or 40 bits. Setting the starting address > - * at 4GB implies there are 508GB or 1020GB for mapping the available > - * hugepages. This is likely enough for most systems, although a device with > - * addressing limitations should call rte_mem_check_dma_mask for ensuring all > - * memory is within supported range. > - */ > -static uint64_t baseaddr = 0x100000000; > -#endif > - > #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5 > void * > eal_get_virtual_area(void *requested_addr, size_t *size, > @@ -85,7 +68,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, > #ifdef RTE_ARCH_64 > if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 && > rte_eal_process_type() == RTE_PROC_PRIMARY) > - next_baseaddr = (void *) baseaddr; > + next_baseaddr = (void *) eal_get_baseaddr(); > #endif > if (requested_addr == NULL && next_baseaddr != NULL) { > requested_addr = next_baseaddr; > diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h > index 798ede553..31eae2278 100644 > --- a/lib/librte_eal/common/eal_private.h > +++ b/lib/librte_eal/common/eal_private.h > @@ -381,4 +381,10 @@ rte_option_init(void); > void > rte_option_usage(void); > > +/** > + * Get OS-specific EAL mapping base address. > + */ > +uint64_t > +eal_get_baseaddr(void); > + > #endif /* _EAL_PRIVATE_H_ */ > diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c > index 9b9a0577a..1bfdb52fb 100644 > --- a/lib/librte_eal/freebsd/eal/eal_memory.c > +++ b/lib/librte_eal/freebsd/eal/eal_memory.c > @@ -22,6 +22,16 @@ > > #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) > > +uint64_t eal_get_baseaddr(void) > +{ > + /* > + * FreeBSD may allocate something in the space we will be mapping things > + * before we get a chance to do that, so use a base address that's far > + * away from where malloc() et al usually map things. > + */ > + return 0x1000000000; > +} > + > /* > * Get physical address of any mapped virtual address in the current process. > */ > diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c > index 1c089a1ef..8516f0d35 100644 > --- a/lib/librte_eal/linux/eal/eal_memory.c > +++ b/lib/librte_eal/linux/eal/eal_memory.c > @@ -70,6 +70,26 @@ static int phys_addrs_available = -1; > > #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" > > +uint64_t eal_get_baseaddr(void) > +{ > + /* > + * Linux kernel uses a really high address as starting address for > + * serving mmaps calls. If there exists addressing limitations and IOVA > + * mode is VA, this starting address is likely too high for those > + * devices. However, it is possible to use a lower address in the > + * process virtual address space as with 64 bits there is a lot of > + * available space. > + * > + * Current known limitations are 39 or 40 bits. Setting the starting > + * address at 4GB implies there are 508GB or 1020GB for mapping the > + * available hugepages. This is likely enough for most systems, although > + * a device with addressing limitations should call > + * rte_mem_check_dma_mask for ensuring all memory is within supported > + * range. > + */ > + return 0x100000000; > +} > + > /* > * Get physical address of any mapped virtual address in the current process. > */ > -- > 2.17.1 What about windows port? -- David Marchand
On Wed, 2 Oct 2019 13:41:30 +0200
David Marchand <david.marchand@redhat.com> wrote:
> > +uint64_t eal_get_baseaddr(void)
> > +{
> > + /*
> > + * Linux kernel uses a really high address as starting address for
> > + * serving mmaps calls. If there exists addressing limitations and IOVA
> > + * mode is VA, this starting address is likely too high for those
> > + * devices. However, it is possible to use a lower address in the
> > + * process virtual address space as with 64 bits there is a lot of
> > + * available space.
> > + *
> > + * Current known limitations are 39 or 40 bits. Setting the starting
> > + * address at 4GB implies there are 508GB or 1020GB for mapping the
> > + * available hugepages. This is likely enough for most systems, although
> > + * a device with addressing limitations should call
> > + * rte_mem_check_dma_mask for ensuring all memory is within supported
> > + * range.
> > + */
> > + return 0x100000000;
Is this going to work right on 32 bit builds where sizeof(uint) == 4
then constants default to 32. Does it need ul or ull suffix (or a cast)?
On 02-Oct-19 12:41 PM, David Marchand wrote: > On Wed, Jul 31, 2019 at 2:47 PM Anatoly Burakov > <anatoly.burakov@intel.com> wrote: >> >> Not all OS's follow Linux's memory layout, which may lead to >> problems following the suggested common address hint absent >> of a base-virtaddr flag. Make this address hint OS-specific. >> >> Cc: stable@dpdk.org > > Missing Fixes: ? No, not really. This isn't a bug as such - it was intended behavior. It's just /wrong/ intended behavior :) > > What about windows port? > > I don't see it supporting secondary processes or mapping shared config anywhere. So probably unimplemented for now. -- Thanks, Anatoly
On 02-Oct-19 3:42 PM, Stephen Hemminger wrote:
> On Wed, 2 Oct 2019 13:41:30 +0200
> David Marchand <david.marchand@redhat.com> wrote:
>
>>> +uint64_t eal_get_baseaddr(void)
>>> +{
>>> + /*
>>> + * Linux kernel uses a really high address as starting address for
>>> + * serving mmaps calls. If there exists addressing limitations and IOVA
>>> + * mode is VA, this starting address is likely too high for those
>>> + * devices. However, it is possible to use a lower address in the
>>> + * process virtual address space as with 64 bits there is a lot of
>>> + * available space.
>>> + *
>>> + * Current known limitations are 39 or 40 bits. Setting the starting
>>> + * address at 4GB implies there are 508GB or 1020GB for mapping the
>>> + * available hugepages. This is likely enough for most systems, although
>>> + * a device with addressing limitations should call
>>> + * rte_mem_check_dma_mask for ensuring all memory is within supported
>>> + * range.
>>> + */
>>> + return 0x100000000;
>
> Is this going to work right on 32 bit builds where sizeof(uint) == 4
> then constants default to 32. Does it need ul or ull suffix (or a cast)?
>
Adding ULL in v2, thanks.
--
Thanks,
Anatoly
Not all OS's follow Linux's memory layout, which may lead to problems following the suggested common address hint absent of a base-virtaddr flag. Make this address hint OS-specific. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- Notes: v5: - Add ULL suffix as per Stephen's comment lib/librte_eal/common/eal_common_memory.c | 19 +------------------ lib/librte_eal/common/eal_private.h | 6 ++++++ lib/librte_eal/freebsd/eal/eal_memory.c | 10 ++++++++++ lib/librte_eal/linux/eal/eal_memory.c | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 19ea47570b..4a9cc1f19a 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -40,23 +40,6 @@ static void *next_baseaddr; static uint64_t system_page_sz; -#ifdef RTE_ARCH_64 -/* - * Linux kernel uses a really high address as starting address for serving - * mmaps calls. If there exists addressing limitations and IOVA mode is VA, - * this starting address is likely too high for those devices. However, it - * is possible to use a lower address in the process virtual address space - * as with 64 bits there is a lot of available space. - * - * Current known limitations are 39 or 40 bits. Setting the starting address - * at 4GB implies there are 508GB or 1020GB for mapping the available - * hugepages. This is likely enough for most systems, although a device with - * addressing limitations should call rte_mem_check_dma_mask for ensuring all - * memory is within supported range. - */ -static uint64_t baseaddr = 0x100000000; -#endif - #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5 void * eal_get_virtual_area(void *requested_addr, size_t *size, @@ -85,7 +68,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, #ifdef RTE_ARCH_64 if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 && rte_eal_process_type() == RTE_PROC_PRIMARY) - next_baseaddr = (void *) baseaddr; + next_baseaddr = (void *) eal_get_baseaddr(); #endif if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 798ede553b..31eae22787 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -381,4 +381,10 @@ rte_option_init(void); void rte_option_usage(void); +/** + * Get OS-specific EAL mapping base address. + */ +uint64_t +eal_get_baseaddr(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c index cd31827c2b..98d65fde0a 100644 --- a/lib/librte_eal/freebsd/eal/eal_memory.c +++ b/lib/librte_eal/freebsd/eal/eal_memory.c @@ -21,6 +21,16 @@ #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) +uint64_t eal_get_baseaddr(void) +{ + /* + * FreeBSD may allocate something in the space we will be mapping things + * before we get a chance to do that, so use a base address that's far + * away from where malloc() et al usually map things. + */ + return 0x1000000000ULL; +} + /* * Get physical address of any mapped virtual address in the current process. */ diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c index 8f62c343d6..fa598b21a4 100644 --- a/lib/librte_eal/linux/eal/eal_memory.c +++ b/lib/librte_eal/linux/eal/eal_memory.c @@ -69,6 +69,26 @@ static int phys_addrs_available = -1; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +uint64_t eal_get_baseaddr(void) +{ + /* + * Linux kernel uses a really high address as starting address for + * serving mmaps calls. If there exists addressing limitations and IOVA + * mode is VA, this starting address is likely too high for those + * devices. However, it is possible to use a lower address in the + * process virtual address space as with 64 bits there is a lot of + * available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting + * address at 4GB implies there are 508GB or 1020GB for mapping the + * available hugepages. This is likely enough for most systems, although + * a device with addressing limitations should call + * rte_mem_check_dma_mask for ensuring all memory is within supported + * range. + */ + return 0x100000000ULL; +} + /* * Get physical address of any mapped virtual address in the current process. */ -- 2.17.1
Currently, mem config will be mapped without using the virtual area reservation infrastructure, which means it will be mapped at an arbitrary location. This may cause failures to map the shared config in secondary process due to things like PCI whitelist arguments allocating memory in a space where the primary has allocated the shared mem config. Fix this by using virtual area reservation to reserve space for the mem config, thereby avoiding the problem and reserving the shared config (hopefully) far away from any normal memory allocations. Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> --- Notes: v4: - Fix mem config length to always be page-aligned v3: - Fix alignment issues with base address v2: - Fix issue with unneeded ADDR_IS_HINT flag that broke things on 32-bit builds lib/librte_eal/freebsd/eal/eal.c | 28 +++++++++++++++++++++------ lib/librte_eal/linux/eal/eal.c | 33 +++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/lib/librte_eal/freebsd/eal/eal.c b/lib/librte_eal/freebsd/eal/eal.c index f86e9aa318..5b869b895a 100644 --- a/lib/librte_eal/freebsd/eal/eal.c +++ b/lib/librte_eal/freebsd/eal/eal.c @@ -218,7 +218,10 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t page_sz = sysconf(_SC_PAGE_SIZE); + size_t cfg_len = sizeof(*rte_config.mem_config); + size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -235,7 +238,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -253,15 +256,28 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(NULL, &cfg_len_aligned, page_sz, + 0, 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); close(mem_cfg_fd); mem_cfg_fd = -1; return -1; } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len_aligned, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); rte_config.mem_config = rte_mem_cfg_addr; diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index f39720637f..6488171010 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -306,7 +306,10 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t page_sz = sysconf(_SC_PAGE_SIZE); + size_t cfg_len = sizeof(*rte_config.mem_config); + size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -318,7 +321,7 @@ rte_eal_config_create(void) if (internal_config.base_virtaddr != 0) rte_mem_cfg_addr = (void *) RTE_ALIGN_FLOOR(internal_config.base_virtaddr - - sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE)); + sizeof(struct rte_mem_config), page_sz); else rte_mem_cfg_addr = NULL; @@ -331,7 +334,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -349,13 +352,25 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ - close(mem_cfg_fd); - mem_cfg_fd = -1; + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, + &cfg_len_aligned, page_sz, 0, 0); + if (rte_mem_cfg_addr == NULL) { RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } + + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len_aligned, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + munmap(rte_mem_cfg_addr, cfg_len); + close(mem_cfg_fd); + mem_cfg_fd = -1; + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); return -1; } -- 2.17.1
On Thu, Oct 24, 2019 at 2:37 PM Anatoly Burakov <anatoly.burakov@intel.com> wrote: > > Currently, mem config will be mapped without using the virtual > area reservation infrastructure, which means it will be mapped > at an arbitrary location. This may cause failures to map the > shared config in secondary process due to things like PCI > whitelist arguments allocating memory in a space where the > primary has allocated the shared mem config. > > Fix this by using virtual area reservation to reserve space for > the mem config, thereby avoiding the problem and reserving the > shared config (hopefully) far away from any normal memory > allocations. > > Cc: stable@dpdk.org > > Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> > --- > > Notes: > v4: > - Fix mem config length to always be page-aligned > > v3: > - Fix alignment issues with base address > > v2: > - Fix issue with unneeded ADDR_IS_HINT flag that broke things > on 32-bit builds > > lib/librte_eal/freebsd/eal/eal.c | 28 +++++++++++++++++++++------ > lib/librte_eal/linux/eal/eal.c | 33 +++++++++++++++++++++++--------- > 2 files changed, 46 insertions(+), 15 deletions(-) > > diff --git a/lib/librte_eal/freebsd/eal/eal.c b/lib/librte_eal/freebsd/eal/eal.c > index f86e9aa318..5b869b895a 100644 > --- a/lib/librte_eal/freebsd/eal/eal.c > +++ b/lib/librte_eal/freebsd/eal/eal.c > @@ -218,7 +218,10 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) > static int > rte_eal_config_create(void) > { > - void *rte_mem_cfg_addr; > + size_t page_sz = sysconf(_SC_PAGE_SIZE); > + size_t cfg_len = sizeof(*rte_config.mem_config); > + size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); > + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; > int retval; > > const char *pathname = eal_runtime_config_path(); Updated existing call to sysconf with page_sz variable. if (internal_config.base_virtaddr != 0) rte_mem_cfg_addr = (void *) RTE_ALIGN_FLOOR(internal_config.base_virtaddr - - sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE)); + sizeof(struct rte_mem_config), page_sz); else rte_mem_cfg_addr = NULL; > @@ -235,7 +238,7 @@ rte_eal_config_create(void) > } > } > > - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); > + retval = ftruncate(mem_cfg_fd, cfg_len); > if (retval < 0){ > close(mem_cfg_fd); > mem_cfg_fd = -1; > @@ -253,15 +256,28 @@ rte_eal_config_create(void) > return -1; > } > > - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), > - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); > - > - if (rte_mem_cfg_addr == MAP_FAILED){ > + /* reserve space for config */ > + rte_mem_cfg_addr = eal_get_virtual_area(NULL, &cfg_len_aligned, page_sz, > + 0, 0); Fixed conflict with base-virtaddr patches. > + if (rte_mem_cfg_addr == NULL) { > RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); > close(mem_cfg_fd); > mem_cfg_fd = -1; > return -1; > } Applied, thanks. -- David Marchand
On Thu, Oct 24, 2019 at 2:37 PM Anatoly Burakov
<anatoly.burakov@intel.com> wrote:
>
> Not all OS's follow Linux's memory layout, which may lead to
> problems following the suggested common address hint absent
> of a base-virtaddr flag. Make this address hint OS-specific.
>
> Cc: stable@dpdk.org
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Applied, thanks.
--
David Marchand