DPDK patches and discussions
 help / color / mirror / Atom feed
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Cc: andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com,
	keith.wiles@intel.com, benjamin.walker@intel.com,
	bruce.richardson@intel.com, thomas@monjalon.net
Subject: [dpdk-dev] [PATCH 14/23] eal: add support for dynamic unmapping of pages
Date: Tue, 19 Dec 2017 11:04:39 +0000	[thread overview]
Message-ID: <bc77dcd45d3d4ca301fc4753bec25e5db93e94ec.1513680516.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <cover.1513680516.git.anatoly.burakov@intel.com>
In-Reply-To: <cover.1513680516.git.anatoly.burakov@intel.com>

This isn't used anywhere yet, but the support is now there. Also,
adding cleanup to allocation procedures, so that if we fail to
allocate everything we asked for, we can free all of it back.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/eal_memalloc.h       |   3 +
 lib/librte_eal/linuxapp/eal/eal_memalloc.c | 131 ++++++++++++++++++++++++++++-
 2 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
index 59fd330..47e4367 100755
--- a/lib/librte_eal/common/eal_memalloc.h
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -44,4 +44,7 @@ int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size,
 		int socket, bool exact);
 
+int
+eal_memalloc_free_page(struct rte_memseg *ms);
+
 #endif // EAL_MEMALLOC_H
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 527c2f6..13172a0 100755
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -109,6 +109,18 @@ huge_recover_sigbus(void)
 	}
 }
 
+/*
+ * uses fstat to report the size of a file on disk
+ */
+static bool
+is_zero_length(int fd)
+{
+	struct stat st;
+	if (fstat(fd, &st) < 0)
+		return false;
+	return st.st_blocks == 0;
+}
+
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 static bool
 prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id) {
@@ -267,6 +279,61 @@ alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id,
 	return ret;
 }
 
+static int
+free_page(struct rte_memseg *ms, struct hugepage_info *hi, unsigned list_idx,
+		unsigned seg_idx) {
+	uint64_t fa_offset;
+	char path[PATH_MAX];
+	int fd;
+
+	fa_offset = seg_idx * ms->hugepage_sz;
+
+	if (internal_config.single_file_segments) {
+		eal_get_hugefile_path(path, sizeof(path), hi->hugedir, list_idx);
+	} else {
+		eal_get_hugefile_path(path, sizeof(path), hi->hugedir,
+				list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);
+	}
+
+	munmap(ms->addr, ms->hugepage_sz);
+
+	// TODO: race condition?
+
+	if (mmap(ms->addr, ms->hugepage_sz, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
+				MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL, "couldn't unmap page\n");
+		return -1;
+	}
+
+	if (internal_config.single_file_segments) {
+		/* now, truncate or remove the original file */
+		fd = open(path, O_RDWR, 0600);
+		if (fd < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
+					strerror(errno));
+			// TODO: proper error handling
+			return -1;
+		}
+
+		if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				fa_offset, ms->hugepage_sz)) {
+			RTE_LOG(DEBUG, EAL, "Page deallocation failed: %s\n",
+				strerror(errno));
+		}
+		if (is_zero_length(fd)) {
+			unlink(path);
+		}
+		close(fd);
+	} else {
+		unlink(path);
+	}
+
+	memset(ms, 0, sizeof(*ms));
+
+	return 0;
+}
+
 int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 		uint64_t size, int socket, bool exact) {
@@ -274,7 +341,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 	struct rte_memseg_list *msl = NULL;
 	void *addr;
 	unsigned msl_idx;
-	int cur_idx, next_idx, end_idx, i, ret = 0;
+	int cur_idx, next_idx, start_idx, end_idx, i, j, ret = 0;
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 	bool have_numa;
 	int oldpolicy;
@@ -366,6 +433,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 	}
 
 	end_idx = cur_idx + n;
+	start_idx = cur_idx;
 
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 	have_numa = prepare_numa(&oldpolicy, oldmask, socket);
@@ -387,6 +455,20 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 				ret = i;
 				goto restore_numa;
 			}
+			RTE_LOG(DEBUG, EAL, "exact amount of pages was requested, so returning %i allocated pages\n",
+				i);
+
+			/* clean up */
+			for (j = start_idx; j < cur_idx; j++) {
+				struct rte_memseg *tmp;
+				struct rte_fbarray *arr = &msl->memseg_arr;
+
+				tmp = rte_fbarray_get(arr, j);
+				if (free_page(tmp, hi, msl_idx, start_idx + j))
+					rte_panic("Cannot free page\n");
+
+				rte_fbarray_set_used(arr, j, false);
+			}
 			if (ms)
 				memset(ms, 0, sizeof(struct rte_memseg*) * n);
 			ret = -1;
@@ -414,3 +496,50 @@ eal_memalloc_alloc_page(uint64_t size, int socket) {
 		return NULL;
 	return ms;
 }
+
+int
+eal_memalloc_free_page(struct rte_memseg *ms) {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl = NULL;
+	unsigned msl_idx, seg_idx;
+	struct hugepage_info *hi = NULL;
+
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	for (int i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) {
+		if (ms->hugepage_sz ==
+				internal_config.hugepage_info[i].hugepage_sz) {
+			hi = &internal_config.hugepage_info[i];
+			break;
+		}
+	}
+	if (!hi) {
+		RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+		return -1;
+	}
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		uintptr_t start_addr, end_addr;
+		struct rte_memseg_list *cur = &mcfg->memsegs[msl_idx];
+
+		start_addr = (uintptr_t) cur->base_va;
+		end_addr = start_addr +
+				cur->memseg_arr.capacity * cur->hugepage_sz;
+
+		if ((uintptr_t) ms->addr < start_addr ||
+				(uintptr_t) ms->addr >= end_addr) {
+			continue;
+		}
+		msl = cur;
+		seg_idx = RTE_PTR_DIFF(ms->addr, start_addr) / ms->hugepage_sz;
+		break;
+	}
+	if (!msl) {
+		RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
+		return -1;
+	}
+	rte_fbarray_set_used(&msl->memseg_arr, seg_idx, false);
+	return free_page(ms, hi, msl_idx, seg_idx);
+}
-- 
2.7.4

  parent reply	other threads:[~2017-12-19 11:05 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-19 11:04 [dpdk-dev] [PATCH 00/23] Dynamic memory allocation for DPDK Anatoly Burakov
     [not found] ` <cover.1513594170.git.anatoly.burakov@intel.com>
2017-12-19 11:04   ` [dpdk-dev] [RFC 01/23] eal/memory: move get_virtual_area out of linuxapp eal_memory.c Anatoly Burakov
2017-12-19 11:15     ` Burakov, Anatoly
2017-12-19 11:04   ` [dpdk-dev] [RFC 02/23] eal/lcore: add function to report number of detected sockets Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 04/23] eal/malloc: move all locking to heap Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 05/23] eal/malloc: protect malloc heap stats with a lock Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 06/23] eal/malloc: make malloc a doubly-linked list Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 07/23] eal/malloc: make malloc_elem_join_adjacent_free public Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 10/23] eal: populate hugepage counts from socket-specific sysfs path Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 12/23] eal/memalloc: add support for dynamic memory allocation Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 13/23] eal/memory: make use of dynamic memory allocation for init Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 14/23] eal/memory: add support for dynamic unmapping of pages Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 15/23] eal/memalloc: add function to check if memory is physically contiguous Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 16/23] eal/malloc: enable dynamic memory allocation/free on malloc/free Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 17/23] eal/malloc: add backend support for contiguous memory allocation Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 18/23] eal/malloc: add rte_malloc support for allocating contiguous memory Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 19/23] eal/memzone: add support for reserving physically contiguous memzones Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 20/23] eal/memzone: make memzones use rte_fbarray Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 21/23] lib/mempool: add support for the new memory allocation methods Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 23/23] eal/memalloc: register/unregister memory with VFIO when alloc/free pages Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 01/23] eal: move get_virtual_area out of linuxapp eal_memory.c Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 02/23] eal: add function to report number of detected sockets Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 03/23] eal: add rte_fbarray Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 04/23] eal: move all locking to heap Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 05/23] eal: protect malloc heap stats with a lock Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 06/23] eal: make malloc a doubly-linked list Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 07/23] eal: make malloc_elem_join_adjacent_free public Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 08/23] eal: add "single file segments" command-line option Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 09/23] eal: add "legacy memory" option Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 10/23] eal: read hugepage counts from node-specific sysfs path Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 11/23] eal: replace memseg with memseg lists Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 12/23] eal: add support for dynamic memory allocation Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 13/23] eal: make use of dynamic memory allocation for init Anatoly Burakov
2017-12-19 11:04 ` Anatoly Burakov [this message]
2017-12-19 11:04 ` [dpdk-dev] [PATCH 15/23] eal: add API to check if memory is physically contiguous Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 16/23] eal: enable dynamic memory allocation/free on malloc/free Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 17/23] eal: add backend support for contiguous memory allocation Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 18/23] eal: add rte_malloc support for allocating contiguous memory Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 19/23] eal: enable reserving physically contiguous memzones Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 20/23] eal: make memzones use rte_fbarray Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 21/23] mempool: add support for the new memory allocation methods Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 22/23] vfio: allow to map other memory regions Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 23/23] eal: map/unmap memory with VFIO when alloc/free pages Anatoly Burakov
2017-12-19 11:15 ` [dpdk-dev] [PATCH 00/23] Dynamic memory allocation for DPDK Burakov, Anatoly

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bc77dcd45d3d4ca301fc4753bec25e5db93e94ec.1513680516.git.anatoly.burakov@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=andras.kovacs@ericsson.com \
    --cc=benjamin.walker@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=keith.wiles@intel.com \
    --cc=laszlo.vadkeri@ericsson.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).