patches for DPDK stable branches
 help / color / mirror / Atom feed
From: David Wilder <dwilder@us.ibm.com>
To: stable@dpdk.org
Cc: pradeep@us.ibm.com, chaozhu@linux.vnet.ibm.com
Subject: [dpdk-stable] [PATCH] mem: Add Power9 support in rte_eal_hugepage_init
Date: Fri,  9 Nov 2018 11:28:43 -0800	[thread overview]
Message-ID: <20181109192843.2718-1-dwilder@us.ibm.com> (raw)

Determine if the ppc64 platform is Power9 or Power8 and perform huge
page mapping appropriately for the selected platform.

Signed-off-by: Pradeep Satyanarayana <pradeep@us.ibm.com>
Tested-by: David Wilder <wilder@us.ibm.com>
---
On IBM Power8, when mmaping hugepage files the address hint supplied to mmap
is not always honored, therefor we let the kernel pick the address by
specifying a NULL address hint. On Power9 the address hint is honored as
expected. This patch detects the platform, if Power9 the address hint is
supplied to mmap and the pages are sorted appropriately. Hugepage mapping for
both primary and secondary processes now work correctly on Power9. I have
retain the original behavior and limitations on Power8. Additionally the flags
supplied to mmap() have been corrected eliminating the message "Cannot get
a virtual area" messages previously seen during EAL init on Power.

 lib/librte_eal/linuxapp/eal/eal_memory.c | 75 +++++++++++++++++-------
 1 file changed, 54 insertions(+), 21 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index bac969a12..5b7001be8 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -50,6 +50,9 @@
 #include <limits.h>
 #include <sys/ioctl.h>
 #include <sys/time.h>
+#ifdef RTE_ARCH_PPC_64
+#include <sys/auxv.h>
+#endif
 #include <signal.h>
 #include <setjmp.h>
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
@@ -107,6 +110,10 @@ static uint64_t baseaddr = 0x100000000;
 
 static bool phys_addrs_available = true;
 
+#ifdef RTE_ARCH_PPC_64
+static int p8;
+#endif
+
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
 
 static void
@@ -309,12 +316,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
 		addr_hint = get_addr_hint();
 
 		addr = mmap(addr_hint,
-				(*size) + hugepage_sz, PROT_READ,
-#ifdef RTE_ARCH_PPC_64
-				MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-#else
-				MAP_PRIVATE,
-#endif
+				(*size) + hugepage_sz, PROT_READ, MAP_PRIVATE,
 				fd, 0);
 		if (addr == MAP_FAILED) {
 			/* map failed. Let's try with less memory */
@@ -501,6 +503,15 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 			 * vma_len. If it fails, vma_addr is NULL, so
 			 * let the kernel provide the address. */
 			vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
+#ifdef RTE_ARCH_PPC_64
+			/*
+			 * On power8 the address hint is not consistently
+			 * honored, therefor we always let the
+			 * kernel provide the address.
+			 */
+			if (p8)
+				vma_addr = NULL;
+#endif
 			if (vma_addr == NULL)
 				vma_len = hugepage_sz;
 		}
@@ -1059,6 +1070,23 @@ rte_eal_hugepage_init(void)
 	int nr_hugefiles, nr_hugepages = 0;
 	void *addr;
 
+#ifdef RTE_ARCH_PPC_64
+	char *platform;
+	platform = (char *)getauxval(AT_BASE_PLATFORM);
+
+	p8 = 0;
+
+	/* Alert the user in case our assumptions are incorrect */
+	if (platform == NULL)
+		printf("Some distros on P9 do not support "
+			"getauxval(AT_BASE_PLATFORM). Assuming P9\n");
+
+	if (platform && !strncmp(platform, "power8", 6)) {
+		RTE_LOG(DEBUG, EAL, "This must be a P8\n");
+		p8 = 1;
+	} else
+		RTE_LOG(DEBUG, EAL, "This must be a P9\n");
+#endif
 	test_phys_addrs_available();
 
 	memset(used_hp, 0, sizeof(used_hp));
@@ -1305,14 +1333,22 @@ rte_eal_hugepage_init(void)
 			new_memseg = 1;
 
 #ifdef RTE_ARCH_PPC_64
-		/* On PPC64 architecture, the mmap always start from higher
-		 * virtual address to lower address. Here, both the physical
-		 * address and virtual address are in descending order */
+		/*
+		 * On power8 we let the kernel selected the virtual address
+		 * for mmaped segments, successive mmaps will start from
+		 * higher virtual address to lower address. Physical address
+		 * are in descending order for both platforms.
+		 */
 		else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) !=
 		    hugepage[i].size)
 			new_memseg = 1;
-		else if (((unsigned long)hugepage[i-1].final_va -
-		    (unsigned long)hugepage[i].final_va) != hugepage[i].size)
+		else if ((((unsigned long)hugepage[i-1].final_va -
+		    (unsigned long)hugepage[i].final_va) !=
+		    hugepage[i].size) && (p8))
+			new_memseg = 1;
+		else if ((((unsigned long)hugepage[i].final_va -
+		    (unsigned long)hugepage[i-1].final_va) !=
+		    hugepage[i].size) && (!p8))
 			new_memseg = 1;
 #else
 		else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) !=
@@ -1338,9 +1374,12 @@ rte_eal_hugepage_init(void)
 		else {
 #ifdef RTE_ARCH_PPC_64
 		/* Use the phy and virt address of the last page as segment
-		 * address for IBM Power architecture */
-			mcfg->memseg[j].iova = hugepage[i].physaddr;
-			mcfg->memseg[j].addr = hugepage[i].final_va;
+		 * address for IBM Power8 architecture.
+		 */
+			if (p8) {
+				mcfg->memseg[j].iova = hugepage[i].physaddr;
+				mcfg->memseg[j].addr = hugepage[i].final_va;
+			}
 #endif
 			mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;
 		}
@@ -1437,13 +1476,7 @@ rte_eal_hugepage_attach(void)
 		 * use mmap to get identical addresses as the primary process.
 		 */
 		base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
-				 PROT_READ,
-#ifdef RTE_ARCH_PPC_64
-				 MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-#else
-				 MAP_PRIVATE,
-#endif
-				 fd_zero, 0);
+				 PROT_READ, MAP_PRIVATE, fd_zero, 0);
 		if (base_addr == MAP_FAILED ||
 		    base_addr != mcfg->memseg[s].addr) {
 			max_seg = s;
-- 
2.19.1

             reply	other threads:[~2018-11-09 19:29 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-09 19:28 David Wilder [this message]
2018-11-10  2:59 ` dwilder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181109192843.2718-1-dwilder@us.ibm.com \
    --to=dwilder@us.ibm.com \
    --cc=chaozhu@linux.vnet.ibm.com \
    --cc=pradeep@us.ibm.com \
    --cc=stable@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).