DPDK patches and discussions
 help / color / mirror / Atom feed
From: Alejandro Lucero <alejandro.lucero@netronome.com>
To: dev@dpdk.org
Cc: stable@dpdk.org, anatoly.burakov@intel.com
Subject: [dpdk-dev] [PATCH v4 3/5] mem: use address hint for mapping hugepages
Date: Tue, 10 Jul 2018 18:25:50 +0100	[thread overview]
Message-ID: <1531243552-7795-4-git-send-email-alejandro.lucero@netronome.com> (raw)
In-Reply-To: <1531243552-7795-1-git-send-email-alejandro.lucero@netronome.com>

Linux kernel uses a really high address as starting address for
serving mmaps calls. If there exists addressing limitations and
IOVA mode is VA, this starting address is likely too high for
those devices. However, it is possible to use a lower address in
the process virtual address space as with 64 bits there is a lot
of available space.

This patch adds an address hint as starting address for 64 bits
systems.

Applicable to v17.11.3 only.

Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/librte_eal/linuxapp/eal/eal_memory.c | 55 ++++++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 17c20d4..2ed4017 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -88,6 +88,23 @@
 
 static uint64_t baseaddr_offset;
 
+#ifdef RTE_ARCH_64
+/*
+ * Linux kernel uses a really high address as starting address for serving
+ * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
+ * this starting address is likely too high for those devices. However, it
+ * is possible to use a lower address in the process virtual address space
+ * as with 64 bits there is a lot of available space.
+ *
+ * Current known limitations are 39 or 40 bits. Setting the starting address
+ * at 4GB implies there are 508GB or 1020GB for mapping the available
+ * hugepages. This is likely enough for most systems, although a device with
+ * addressing limitations should call rte_dev_check_dma_mask for ensuring all
+ * memory is within supported range.
+ */
+static uint64_t baseaddr = 0x100000000;
+#endif
+
 static bool phys_addrs_available = true;
 
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
@@ -250,6 +267,23 @@
 	}
 }
 
+static void *
+get_addr_hint(void)
+{
+	if (internal_config.base_virtaddr != 0) {
+		return (void *) (uintptr_t)
+			    (internal_config.base_virtaddr +
+			     baseaddr_offset);
+	} else {
+#ifdef RTE_ARCH_64
+		return (void *) (uintptr_t) (baseaddr +
+				baseaddr_offset);
+#else
+		return NULL;
+#endif
+	}
+}
+
 /*
  * Try to mmap *size bytes in /dev/zero. If it is successful, return the
  * pointer to the mmap'd area and keep *size unmodified. Else, retry
@@ -260,16 +294,10 @@
 static void *
 get_virtual_area(size_t *size, size_t hugepage_sz)
 {
-	void *addr;
+	void *addr, *addr_hint;
 	int fd;
 	long aligned_addr;
 
-	if (internal_config.base_virtaddr != 0) {
-		addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
-				baseaddr_offset);
-	}
-	else addr = NULL;
-
 	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
 
 	fd = open("/dev/zero", O_RDONLY);
@@ -278,7 +306,9 @@
 		return NULL;
 	}
 	do {
-		addr = mmap(addr,
+		addr_hint = get_addr_hint();
+
+		addr = mmap(addr_hint,
 				(*size) + hugepage_sz, PROT_READ,
 #ifdef RTE_ARCH_PPC_64
 				MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
@@ -286,8 +316,15 @@
 				MAP_PRIVATE,
 #endif
 				fd, 0);
-		if (addr == MAP_FAILED)
+		if (addr == MAP_FAILED) {
+			/* map failed. Let's try with less memory */
 			*size -= hugepage_sz;
+		} else if (addr_hint && addr != addr_hint) {
+			/* hint was not used. Try with another offset */
+			munmap(addr, (*size) + hugepage_sz);
+			addr = MAP_FAILED;
+			baseaddr_offset += 0x100000000;
+		}
 	} while (addr == MAP_FAILED && *size > 0);
 
 	if (addr == MAP_FAILED) {
-- 
1.9.1

  parent reply	other threads:[~2018-07-10 17:26 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-10 17:25 [dpdk-dev] [PATCH v4 0/5] use IOVAs check based on DMA mask Alejandro Lucero
2018-07-10 17:25 ` [dpdk-dev] [PATCH v4 1/5] mem: add function for checking memsegs IOVAs addresses Alejandro Lucero
2018-07-11 10:12   ` [dpdk-dev] [dpdk-stable] " Eelco Chaudron
2018-07-10 17:25 ` [dpdk-dev] [PATCH v4 2/5] bus/pci: use IOVAs check when setting IOVA mode Alejandro Lucero
2018-07-11 10:18   ` [dpdk-dev] [dpdk-stable] " Eelco Chaudron
2018-07-11 10:41     ` Burakov, Anatoly
2018-07-10 17:25 ` Alejandro Lucero [this message]
2018-07-10 17:25 ` [dpdk-dev] [PATCH v4 4/5] net/nfp: check hugepages IOVAs based on DMA mask Alejandro Lucero
2018-07-10 17:25 ` [dpdk-dev] [PATCH v4 5/5] net/nfp: support IOVA VA mode Alejandro Lucero
2018-07-26 15:41 ` [dpdk-dev] [PATCH v4 0/5] use IOVAs check based on DMA mask Thomas Monjalon
2018-07-27  7:03   ` Alejandro Lucero
2018-07-27  8:01     ` Thomas Monjalon
2018-07-27  8:22       ` Alejandro Lucero
2018-07-27  8:52         ` Thomas Monjalon
2018-07-27  8:59           ` Alejandro Lucero
2018-07-27  8:54         ` Burakov, Anatoly

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1531243552-7795-4-git-send-email-alejandro.lucero@netronome.com \
    --to=alejandro.lucero@netronome.com \
    --cc=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    --cc=stable@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).