From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <aburakov@ecsmtp.ir.intel.com>
Received: from mga09.intel.com (mga09.intel.com [134.134.136.24])
 by dpdk.org (Postfix) with ESMTP id E401A1B8E3
 for <dev@dpdk.org>; Mon,  9 Apr 2018 20:02:03 +0200 (CEST)
X-Amp-Result: SKIPPED(no attachment in message)
X-Amp-File-Uploaded: False
Received: from orsmga001.jf.intel.com ([10.7.209.18])
 by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;
 09 Apr 2018 11:02:02 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.48,427,1517904000"; d="scan'208";a="46399145"
Received: from irvmail001.ir.intel.com ([163.33.26.43])
 by orsmga001.jf.intel.com with ESMTP; 09 Apr 2018 11:01:58 -0700
Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com
 [10.237.217.45])
 by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id
 w39I1vaV031350; Mon, 9 Apr 2018 19:01:57 +0100
Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1])
 by sivswdev01.ir.intel.com with ESMTP id w39I1vRV028404;
 Mon, 9 Apr 2018 19:01:57 +0100
Received: (from aburakov@localhost)
 by sivswdev01.ir.intel.com with LOCAL id w39I1vo7028390;
 Mon, 9 Apr 2018 19:01:57 +0100
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Cc: Bruce Richardson <bruce.richardson@intel.com>, keith.wiles@intel.com,
 jianfeng.tan@intel.com, andras.kovacs@ericsson.com,
 laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com,
 thomas@monjalon.net, konstantin.ananyev@intel.com,
 kuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com,
 nelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch,
 jerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com,
 olivier.matz@6wind.com, shreyansh.jain@nxp.com,
 gowrishankar.m@linux.vnet.ibm.com
Date: Mon,  9 Apr 2018 19:00:55 +0100
Message-Id: <386c54ca7db76e8d5838da853b2c20a95e6bb2e7.1523296700.git.anatoly.burakov@intel.com>
X-Mailer: git-send-email 1.7.0.7
In-Reply-To: <cover.1523296700.git.anatoly.burakov@intel.com>
References: <cover.1523296700.git.anatoly.burakov@intel.com>
In-Reply-To: <cover.1523296700.git.anatoly.burakov@intel.com>
References: <cover.1523218215.git.anatoly.burakov@intel.com>
 <cover.1523296700.git.anatoly.burakov@intel.com>
Subject: [dpdk-dev] [PATCH v5 52/70] eal: add support for unmapping pages at
	runtime
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Mon, 09 Apr 2018 18:02:04 -0000

This isn't used anywhere yet, but the support is now there. Also,
adding cleanup to allocation procedures, so that if we fail to
allocate everything we asked for, we can free all of it back.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <Santosh.Shukla@caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal@nxp.com>
---
 lib/librte_eal/bsdapp/eal/eal_memalloc.c   |  15 +++
 lib/librte_eal/common/eal_memalloc.h       |  14 +++
 lib/librte_eal/linuxapp/eal/eal_memalloc.c | 148 ++++++++++++++++++++++++++++-
 3 files changed, 176 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_memalloc.c b/lib/librte_eal/bsdapp/eal/eal_memalloc.c
index 8c30670..e7bcd2b 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memalloc.c
@@ -24,3 +24,18 @@ eal_memalloc_alloc_seg(size_t __rte_unused page_sz, int __rte_unused socket)
 	RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
 	return NULL;
 }
+
+int
+eal_memalloc_free_seg(struct rte_memseg *ms __rte_unused)
+{
+	RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+	return -1;
+}
+
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms __rte_unused,
+		int n_segs __rte_unused)
+{
+	RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+	return -1;
+}
diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
index f628514..6017345 100644
--- a/lib/librte_eal/common/eal_memalloc.h
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -28,4 +28,18 @@ int
 eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
 		int socket, bool exact);
 
+/*
+ * Deallocate segment
+ */
+int
+eal_memalloc_free_seg(struct rte_memseg *ms);
+
+/*
+ * Deallocate `n_segs` segments. Returns 0 on successful deallocation of all
+ * segments, returns -1 on error. Any segments that could have been deallocated,
+ * will be deallocated even in case of error.
+ */
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs);
+
 #endif // EAL_MEMALLOC_H
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 45ea0ad..11ef742 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -289,6 +289,48 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 	return -1;
 }
 
+static int
+free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
+		unsigned int list_idx, unsigned int seg_idx)
+{
+	char path[PATH_MAX];
+	int fd, ret;
+
+	/* erase page data */
+	memset(ms->addr, 0, ms->len);
+
+	if (mmap(ms->addr, ms->len, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
+				MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL, "couldn't unmap page\n");
+		return -1;
+	}
+
+	fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
+	if (fd < 0)
+		return -1;
+
+	/* if we're able to take out a write lock, we're the last one
+	 * holding onto this page.
+	 */
+
+	ret = lock(fd, 0, ms->len, F_WRLCK);
+	if (ret >= 0) {
+		/* no one else is using this page */
+		if (ret == 1)
+			unlink(path);
+		ret = lock(fd, 0, ms->len, F_UNLCK);
+		if (ret != 1)
+			RTE_LOG(ERR, EAL, "%s(): unable to unlock file %s\n",
+				__func__, path);
+	}
+	close(fd);
+
+	memset(ms, 0, sizeof(*ms));
+
+	return ret;
+}
+
 struct alloc_walk_param {
 	struct hugepage_info *hi;
 	struct rte_memseg **ms;
@@ -305,7 +347,7 @@ alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
 	struct alloc_walk_param *wa = arg;
 	struct rte_memseg_list *cur_msl;
 	size_t page_sz;
-	int cur_idx;
+	int cur_idx, start_idx, j;
 	unsigned int msl_idx, need, i;
 
 	if (msl->page_sz != wa->page_sz)
@@ -324,6 +366,7 @@ alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
 	cur_idx = rte_fbarray_find_next_n_free(&cur_msl->memseg_arr, 0, need);
 	if (cur_idx < 0)
 		return 0;
+	start_idx = cur_idx;
 
 	for (i = 0; i < need; i++, cur_idx++) {
 		struct rte_memseg *cur;
@@ -341,6 +384,25 @@ alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
 			/* if exact number wasn't requested, stop */
 			if (!wa->exact)
 				goto out;
+
+			/* clean up */
+			for (j = start_idx; j < cur_idx; j++) {
+				struct rte_memseg *tmp;
+				struct rte_fbarray *arr =
+						&cur_msl->memseg_arr;
+
+				tmp = rte_fbarray_get(arr, j);
+				if (free_seg(tmp, wa->hi, msl_idx,
+						start_idx + j)) {
+					RTE_LOG(ERR, EAL, "Cannot free page\n");
+					continue;
+				}
+
+				rte_fbarray_set_free(arr, j);
+			}
+			/* clear the list */
+			if (wa->ms)
+				memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs);
 			return -1;
 		}
 		if (wa->ms)
@@ -351,7 +413,39 @@ alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
 out:
 	wa->segs_allocated = i;
 	return 1;
+}
+
+struct free_walk_param {
+	struct hugepage_info *hi;
+	struct rte_memseg *ms;
+};
+static int
+free_seg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *found_msl;
+	struct free_walk_param *wa = arg;
+	uintptr_t start_addr, end_addr;
+	int msl_idx, seg_idx;
 
+	start_addr = (uintptr_t) msl->base_va;
+	end_addr = start_addr + msl->memseg_arr.len * (size_t)msl->page_sz;
+
+	if ((uintptr_t)wa->ms->addr < start_addr ||
+			(uintptr_t)wa->ms->addr >= end_addr)
+		return 0;
+
+	msl_idx = msl - mcfg->memsegs;
+	seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz;
+
+	/* msl is const */
+	found_msl = &mcfg->memsegs[msl_idx];
+
+	rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx);
+	if (free_seg(wa->ms, wa->hi, msl_idx, seg_idx))
+		return -1;
+
+	return 1;
 }
 
 int
@@ -427,3 +521,55 @@ eal_memalloc_alloc_seg(size_t page_sz, int socket)
 	/* return pointer to newly allocated memseg */
 	return ms;
 }
+
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
+{
+	int seg, ret = 0;
+
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	for (seg = 0; seg < n_segs; seg++) {
+		struct rte_memseg *cur = ms[seg];
+		struct hugepage_info *hi = NULL;
+		struct free_walk_param wa;
+		int i, walk_res;
+
+		memset(&wa, 0, sizeof(wa));
+
+		for (i = 0; i < (int)RTE_DIM(internal_config.hugepage_info);
+				i++) {
+			hi = &internal_config.hugepage_info[i];
+			if (cur->hugepage_sz == hi->hugepage_sz)
+				break;
+		}
+		if (i == (int)RTE_DIM(internal_config.hugepage_info)) {
+			RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+			ret = -1;
+			continue;
+		}
+
+		wa.ms = cur;
+		wa.hi = hi;
+
+		walk_res = rte_memseg_list_walk(free_seg_walk, &wa);
+		if (walk_res == 1)
+			continue;
+		if (walk_res == 0)
+			RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
+		ret = -1;
+	}
+	return ret;
+}
+
+int
+eal_memalloc_free_seg(struct rte_memseg *ms)
+{
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	return eal_memalloc_free_seg_bulk(&ms, 1);
+}
-- 
2.7.4