From: Gregory Etelson <gregory@weka.io>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH] eal: use get_mempolicy(2) to find numa socket on Linux
Date: Sun, 18 Jun 2017 18:52:02 +0300 [thread overview]
Message-ID: <25388622.XuEPg0MkIL@polaris> (raw)
Use get_mempolicy(2) to find numa socket on Linux
instead of parsing /proc/self/numa_maps.
When process maps around 1K hugepages
numa_maps file can miss huge records in older Linux kernels.
get_mempolicy() proved more reliable
Requires numactl dev package
Signed-off-by: Gregory Etelson <gregory@weka.io>
---
lib/librte_eal/linuxapp/eal/eal_memory.c | 85 ++++----------------------------
mk/exec-env/linuxapp/rte.vars.mk | 1 +
mk/internal/rte.build-pre.mk | 1 +
3 files changed, 11 insertions(+), 76 deletions(-)
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index e17c9cb..48b71bc 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -54,6 +54,7 @@
#include <sys/time.h>
#include <signal.h>
#include <setjmp.h>
+#include <numaif.h>
#include <rte_log.h>
#include <rte_memory.h>
@@ -495,86 +496,20 @@ unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info
return 0;
}
-/*
- * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
- * page.
- */
static int
find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
- int socket_id;
- char *end, *nodestr;
- unsigned i, hp_count = 0;
- uint64_t virt_addr;
- char buf[BUFSIZ];
- char hugedir_str[PATH_MAX];
- FILE *f;
-
- f = fopen("/proc/self/numa_maps", "r");
- if (f == NULL) {
- RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps,"
- " consider that all memory is in socket_id 0\n");
- return 0;
- }
-
- snprintf(hugedir_str, sizeof(hugedir_str),
- "%s/%s", hpi->hugedir, internal_config.hugefile_prefix);
-
- /* parse numa map */
- while (fgets(buf, sizeof(buf), f) != NULL) {
-
- /* ignore non huge page */
- if (strstr(buf, " huge ") == NULL &&
- strstr(buf, hugedir_str) == NULL)
- continue;
-
- /* get zone addr */
- virt_addr = strtoull(buf, &end, 16);
- if (virt_addr == 0 || end == buf) {
- RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
- goto error;
- }
-
- /* get node id (socket id) */
- nodestr = strstr(buf, " N");
- if (nodestr == NULL) {
- RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
- goto error;
- }
- nodestr += 2;
- end = strstr(nodestr, "=");
- if (end == NULL) {
- RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
- goto error;
- }
- end[0] = '\0';
- end = NULL;
-
- socket_id = strtoul(nodestr, &end, 0);
- if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
- RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
- goto error;
- }
-
- /* if we find this page in our mappings, set socket_id */
- for (i = 0; i < hpi->num_pages[0]; i++) {
- void *va = (void *)(unsigned long)virt_addr;
- if (hugepg_tbl[i].orig_va == va) {
- hugepg_tbl[i].socket_id = socket_id;
- hp_count++;
- }
+ unsigned int i;
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ if (get_mempolicy(&hugepg_tbl[i].socket_id,
+ NULL, 0, hugepg_tbl[i].orig_va,
+ MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to find NUMA socket for %p\n",
+ hugepg_tbl[i].orig_va);
+ return -1;
}
}
-
- if (hp_count < hpi->num_pages[0])
- goto error;
-
- fclose(f);
return 0;
-
-error:
- fclose(f);
- return -1;
}
static int
@@ -1051,8 +986,6 @@ rte_eal_hugepage_init(void)
}
if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
- RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
- (unsigned)(hpi->hugepage_sz / 0x100000));
goto fail;
}
diff --git a/mk/exec-env/linuxapp/rte.vars.mk b/mk/exec-env/linuxapp/rte.vars.mk
index 9a71699..0da977a 100644
--- a/mk/exec-env/linuxapp/rte.vars.mk
+++ b/mk/exec-env/linuxapp/rte.vars.mk
@@ -59,5 +59,6 @@ LINK_USING_CC := 1
EXECENV_LDFLAGS += -export-dynamic
# Add library to the group to resolve symbols
EXECENV_LDLIBS += -ldl
+EXECENV_LDLIBS += -lnuma
export EXECENV_CFLAGS EXECENV_LDFLAGS EXECENV_ASFLAGS EXECENV_LDLIBS
diff --git a/mk/internal/rte.build-pre.mk b/mk/internal/rte.build-pre.mk
index 560cf82..9bc0ffd 100644
--- a/mk/internal/rte.build-pre.mk
+++ b/mk/internal/rte.build-pre.mk
@@ -29,6 +29,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+LDLIBS += -lnuma
_BUILD_TARGETS := _prebuild _build _postbuild
comma := ,
--
2.9.3
next reply other threads:[~2017-06-18 15:52 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-06-18 15:52 Gregory Etelson [this message]
2017-06-21 15:49 ` Sergio Gonzalez Monroy
2017-07-01 14:18 ` Thomas Monjalon
2017-07-03 14:11 ` Sergio Gonzalez Monroy
2017-07-03 14:20 ` Thomas Monjalon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=25388622.XuEPg0MkIL@polaris \
--to=gregory@weka.io \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).