From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id 8FBA25F2C for ; Wed, 7 Mar 2018 17:57:18 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga007.jf.intel.com ([10.7.209.58]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 07 Mar 2018 08:57:18 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.47,436,1515484800"; d="scan'208";a="22774204" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga007.jf.intel.com with ESMTP; 07 Mar 2018 08:57:14 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w27GvDgR032425; Wed, 7 Mar 2018 16:57:13 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w27GvD3k006811; Wed, 7 Mar 2018 16:57:13 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w27GvDPR006807; Wed, 7 Mar 2018 16:57:13 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: Bruce Richardson , keith.wiles@intel.com, jianfeng.tan@intel.com, andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com, thomas@monjalon.net, konstantin.ananyev@intel.com, kuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com, nelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch, jerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com, olivier.matz@6wind.com Date: Wed, 7 Mar 2018 16:56:55 +0000 Message-Id: <9b2d18cf76999c030bcf761b2aaff48b1b8887f1.1520428025.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH v2 27/41] eal: add multiprocess init with memory hotplug X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 07 Mar 2018 16:57:19 -0000 for legacy memory mode, attach to primary's memseg list, and map hugepages as before. for non-legacy mode, preallocate all VA space and then do a sync of local memory map. Signed-off-by: Anatoly Burakov --- lib/librte_eal/bsdapp/eal/eal_hugepage_info.c | 7 ++ lib/librte_eal/common/eal_common_memory.c | 99 +++++++++++++++++++++---- lib/librte_eal/common/eal_hugepages.h | 5 ++ lib/librte_eal/linuxapp/eal/eal.c | 18 +++-- lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 53 ++++++++----- lib/librte_eal/linuxapp/eal/eal_memory.c | 24 ++++-- 6 files changed, 159 insertions(+), 47 deletions(-) diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c index be2dbf0..18e6e5e 100644 --- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c @@ -103,3 +103,10 @@ eal_hugepage_info_init(void) return 0; } + +/* memory hotplug is not supported in FreeBSD, so no need to implement this */ +int +eal_hugepage_info_read(void) +{ + return 0; +} diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 457e239..a571e24 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -20,6 +20,7 @@ #include #include +#include "eal_memalloc.h" #include "eal_private.h" #include "eal_internal_cfg.h" @@ -147,19 +148,11 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz, char name[RTE_FBARRAY_NAME_LEN]; int max_pages; uint64_t mem_amount; - void *addr; if (!internal_config.legacy_mem) { mem_amount = get_mem_amount(page_sz); max_pages = mem_amount / page_sz; - - addr = eal_get_virtual_area(NULL, &mem_amount, page_sz, 0, 0); - if (addr == NULL) { - RTE_LOG(ERR, EAL, "Cannot reserve memory\n"); - return -1; - } } else { - addr = NULL; /* numer of memsegs in each list, these will not be single-page * segments, so RTE_MAX_LEGACY_MEMSEG is like old default. */ @@ -177,7 +170,7 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz, msl->hugepage_sz = page_sz; msl->socket_id = socket_id; - msl->base_va = addr; + msl->base_va = NULL; RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n", page_sz >> 10, socket_id); @@ -186,16 +179,46 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz, } static int -memseg_init(void) +alloc_va_space(struct rte_memseg_list *msl) +{ + uint64_t mem_sz, page_sz; + void *addr; + int flags = 0; + +#ifdef RTE_ARCH_PPC_64 + flags |= MAP_HUGETLB; +#endif + + page_sz = msl->hugepage_sz; + mem_sz = page_sz * msl->memseg_arr.len; + + addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags); + if (addr == NULL) { + if (rte_errno == EADDRNOTAVAIL) + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n", + (unsigned long long)mem_sz, msl->base_va); + else + RTE_LOG(ERR, EAL, "Cannot reserve memory\n"); + return -1; + } + msl->base_va = addr; + + return 0; +} + + +static int +memseg_primary_init(void) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; int socket_id, hpi_idx, msl_idx = 0; struct rte_memseg_list *msl; - if (rte_eal_process_type() == RTE_PROC_SECONDARY) { - RTE_LOG(ERR, EAL, "Secondary process not supported\n"); - return -1; - } + /* if we start allocating memory segments for pages straight away, VA + * space will become fragmented, reducing chances of success when + * secondary process maps the same addresses. to fix this, allocate + * fbarrays first, and then allocate VA space for them. + */ /* create memseg lists */ for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes; @@ -235,12 +258,55 @@ memseg_init(void) total_segs += msl->memseg_arr.len; total_mem = total_segs * msl->hugepage_sz; type_msl_idx++; + + /* no need to preallocate VA in legacy mode */ + if (internal_config.legacy_mem) + continue; + + if (alloc_va_space(msl)) { + RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n"); + return -1; + } } } } return 0; } +static int +memseg_secondary_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int msl_idx = 0; + struct rte_memseg_list *msl; + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + + msl = &mcfg->memsegs[msl_idx]; + + /* skip empty memseg lists */ + if (msl->memseg_arr.len == 0) + continue; + + if (rte_fbarray_attach(&msl->memseg_arr)) { + RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n"); + return -1; + } + + /* no need to preallocate VA space in legacy mode */ + if (internal_config.legacy_mem) + continue; + + /* preallocate VA space */ + if (alloc_va_space(msl)) { + RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n"); + return -1; + } + } + + return 0; +} + static struct rte_memseg * virt2memseg(const void *addr, const struct rte_memseg_list *msl) { @@ -480,7 +546,10 @@ rte_eal_memory_init(void) int retval; RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n"); - retval = memseg_init(); + retval = rte_eal_process_type() == RTE_PROC_PRIMARY ? + memseg_primary_init() : + memseg_secondary_init(); + if (retval < 0) return -1; diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h index f963ae5..38d0b04 100644 --- a/lib/librte_eal/common/eal_hugepages.h +++ b/lib/librte_eal/common/eal_hugepages.h @@ -34,4 +34,9 @@ struct hugepage_file { */ int eal_hugepage_info_init(void); +/** + * Read information about hugepages on Linux, but don't clear them out. + */ +int eal_hugepage_info_read(void); + #endif /* EAL_HUGEPAGES_H */ diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index d336c96..7a0d742 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -805,13 +805,17 @@ rte_eal_init(int argc, char **argv) "KNI module inserted\n"); } - if (internal_config.no_hugetlbfs == 0 && - internal_config.process_type != RTE_PROC_SECONDARY && - eal_hugepage_info_init() < 0) { - rte_eal_init_alert("Cannot get hugepage information."); - rte_errno = EACCES; - rte_atomic32_clear(&run_once); - return -1; + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? + eal_hugepage_info_init() : + eal_hugepage_info_read(); + if (ret < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 7e2475f..7a4adce 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -299,15 +300,9 @@ compare_hpi(const void *a, const void *b) return hpi_b->hugepage_sz - hpi_a->hugepage_sz; } -/* - * when we initialize the hugepage info, everything goes - * to socket 0 by default. it will later get sorted by memory - * initialization procedure. - */ -int -eal_hugepage_info_init(void) -{ - const char dirent_start_text[] = "hugepages-"; +static int +hugepage_info_init(bool clear_hugepages) +{ const char dirent_start_text[] = "hugepages-"; const size_t dirent_start_len = sizeof(dirent_start_text) - 1; unsigned int i, total_pages, num_sizes = 0; DIR *dir; @@ -350,18 +345,20 @@ eal_hugepage_info_init(void) continue; } - /* try to obtain a writelock */ - hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY); + if (clear_hugepages) { + /* try to obtain a writelock */ + hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY); - /* if blocking lock failed */ - if (flock(hpi->lock_descriptor, LOCK_EX) == -1) { - RTE_LOG(CRIT, EAL, - "Failed to lock hugepage directory!\n"); - break; + /* if blocking lock failed */ + if (flock(hpi->lock_descriptor, LOCK_EX) == -1) { + RTE_LOG(CRIT, EAL, + "Failed to lock hugepage directory!\n"); + break; + } + /* clear out the hugepages dir from unused pages */ + if (clear_hugedir(hpi->hugedir) == -1) + break; } - /* clear out the hugepages dir from unused pages */ - if (clear_hugedir(hpi->hugedir) == -1) - break; /* * first, try to put all hugepages into relevant sockets, but @@ -417,10 +414,26 @@ eal_hugepage_info_init(void) num_pages += hpi->num_pages[j]; } if (internal_config.hugepage_info[i].hugedir != NULL && - num_pages > 0) + (num_pages > 0 || !clear_hugepages)) return 0; } /* no valid hugepage mounts available, return error */ return -1; } + +int eal_hugepage_info_read(void) +{ + return hugepage_info_init(false); +} + +/* + * when we initialize the hugepage info, everything goes + * to socket 0 by default. it will later get sorted by memory + * initialization procedure. + */ +int +eal_hugepage_info_init(void) +{ + return hugepage_info_init(true); +} diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index e0b4988..f74291f 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -1569,6 +1569,22 @@ eal_legacy_hugepage_attach(void) return -1; } +static int +eal_hugepage_attach(void) +{ + if (eal_memalloc_sync_with_primary()) { + RTE_LOG(ERR, EAL, "Could not map memory from primary process\n"); + if (aslr_enabled() > 0) { + RTE_LOG(ERR, EAL, "It is recommended to " + "disable ASLR in the kernel " + "and retry running both primary " + "and secondary processes\n"); + } + return -1; + } + return 0; +} + int rte_eal_hugepage_init(void) { @@ -1580,11 +1596,9 @@ rte_eal_hugepage_init(void) int rte_eal_hugepage_attach(void) { - if (internal_config.legacy_mem) - return eal_legacy_hugepage_attach(); - else - RTE_LOG(ERR, EAL, "Secondary processes aren't supported yet\n"); - return -1; + return internal_config.legacy_mem ? + eal_legacy_hugepage_attach() : + eal_hugepage_attach(); } int -- 2.7.4