From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by dpdk.org (Postfix) with ESMTP id 5D10C4F98 for ; Sat, 3 Mar 2018 14:46:36 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 03 Mar 2018 05:46:35 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.47,418,1515484800"; d="scan'208";a="208546140" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga005.fm.intel.com with ESMTP; 03 Mar 2018 05:46:32 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w23DkVjJ012178; Sat, 3 Mar 2018 13:46:31 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w23DkVff023741; Sat, 3 Mar 2018 13:46:31 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w23DkVGF023737; Sat, 3 Mar 2018 13:46:31 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: keith.wiles@intel.com, jianfeng.tan@intel.com, andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net, konstantin.ananyev@intel.com, kuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com, nelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch, jerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com, olivier.matz@6wind.com Date: Sat, 3 Mar 2018 13:46:00 +0000 Message-Id: <6a44741c991bd91ac7c8fd649571e731bec78afe.1520083504.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 12/41] eal: read hugepage counts from node-specific sysfs path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 03 Mar 2018 13:46:37 -0000 For non-legacy memory init mode, instead of looking at generic sysfs path, look at sysfs paths pertaining to each NUMA node for hugepage counts. Note that per-NUMA node path does not provide information regarding reserved pages, so we might not get the best info from these paths, but this saves us from the whole mapping/remapping business before we're actually able to tell which page is on which socket, because we no longer require our memory to be physically contiguous. Legacy memory init will not use this. Signed-off-by: Anatoly Burakov --- lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 79 +++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 8bbf771..706b6d5 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -30,6 +30,7 @@ #include "eal_filesystem.h" static const char sys_dir_path[] = "/sys/kernel/mm/hugepages"; +static const char sys_pages_numa_dir_path[] = "/sys/devices/system/node"; /* this function is only called from eal_hugepage_info_init which itself * is only called from a primary process */ @@ -70,6 +71,45 @@ get_num_hugepages(const char *subdir) return num_pages; } +static uint32_t +get_num_hugepages_on_node(const char *subdir, unsigned int socket) +{ + char path[PATH_MAX], socketpath[PATH_MAX]; + DIR *socketdir; + unsigned long num_pages = 0; + const char *nr_hp_file = "free_hugepages"; + + snprintf(socketpath, sizeof(socketpath), "%s/node%u/hugepages", + sys_pages_numa_dir_path, socket); + + socketdir = opendir(socketpath); + if (socketdir) { + /* Keep calm and carry on */ + closedir(socketdir); + } else { + /* Can't find socket dir, so ignore it */ + return 0; + } + + snprintf(path, sizeof(path), "%s/%s/%s", + socketpath, subdir, nr_hp_file); + if (eal_parse_sysfs_value(path, &num_pages) < 0) + return 0; + + if (num_pages == 0) + RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n", + subdir); + + /* + * we want to return a uint32_t and more than this looks suspicious + * anyway ... + */ + if (num_pages > UINT32_MAX) + num_pages = UINT32_MAX; + + return num_pages; +} + static uint64_t get_default_hp_size(void) { @@ -248,7 +288,7 @@ eal_hugepage_info_init(void) { const char dirent_start_text[] = "hugepages-"; const size_t dirent_start_len = sizeof(dirent_start_text) - 1; - unsigned i, num_sizes = 0; + unsigned int i, total_pages, num_sizes = 0; DIR *dir; struct dirent *dirent; @@ -302,9 +342,27 @@ eal_hugepage_info_init(void) if (clear_hugedir(hpi->hugedir) == -1) break; - /* for now, put all pages into socket 0, - * later they will be sorted */ - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + /* + * first, try to put all hugepages into relevant sockets, but + * if first attempts fails, fall back to collecting all pages + * in one socket and sorting them later + */ + total_pages = 0; + /* we also don't want to do this for legacy init */ + if (!internal_config.legacy_mem) + for (i = 0; i < rte_num_sockets(); i++) { + unsigned int num_pages = + get_num_hugepages_on_node( + dirent->d_name, i); + hpi->num_pages[i] = num_pages; + total_pages += num_pages; + } + /* + * we failed to sort memory from the get go, so fall + * back to old way + */ + if (total_pages == 0) + hpi->num_pages[0] = get_num_hugepages(dirent->d_name); #ifndef RTE_ARCH_64 /* for 32-bit systems, limit number of hugepages to @@ -328,10 +386,19 @@ eal_hugepage_info_init(void) sizeof(internal_config.hugepage_info[0]), compare_hpi); /* now we have all info, check we have at least one valid size */ - for (i = 0; i < num_sizes; i++) + for (i = 0; i < num_sizes; i++) { + /* pages may no longer all be on socket 0, so check all */ + unsigned int j, num_pages = 0; + + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + struct hugepage_info *hpi = + &internal_config.hugepage_info[i]; + num_pages += hpi->num_pages[j]; + } if (internal_config.hugepage_info[i].hugedir != NULL && - internal_config.hugepage_info[i].num_pages[0] > 0) + num_pages > 0) return 0; + } /* no valid hugepage mounts available, return error */ return -1; -- 2.7.4