From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id 353D42BF1 for ; Sat, 3 Mar 2018 14:46:37 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga103.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 03 Mar 2018 05:46:36 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.47,418,1515484800"; d="scan'208";a="22588043" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga006.jf.intel.com with ESMTP; 03 Mar 2018 05:46:33 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w23DkWAe012189; Sat, 3 Mar 2018 13:46:32 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w23DkWpR023767; Sat, 3 Mar 2018 13:46:32 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w23DkWg5023763; Sat, 3 Mar 2018 13:46:32 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: keith.wiles@intel.com, jianfeng.tan@intel.com, andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net, konstantin.ananyev@intel.com, kuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com, nelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch, jerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com, olivier.matz@6wind.com Date: Sat, 3 Mar 2018 13:46:03 +0000 Message-Id: <421887e2d97e0bf14106cf23ad65e99cad670845.1520083504.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 15/41] eal: add support for unmapping pages at runtime X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 03 Mar 2018 13:46:38 -0000 This isn't used anywhere yet, but the support is now there. Also, adding cleanup to allocation procedures, so that if we fail to allocate everything we asked for, we can free all of it back. Signed-off-by: Anatoly Burakov --- lib/librte_eal/common/eal_memalloc.h | 3 + lib/librte_eal/linuxapp/eal/eal_memalloc.c | 148 ++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 5 deletions(-) diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h index c1076cf..adf59c4 100644 --- a/lib/librte_eal/common/eal_memalloc.h +++ b/lib/librte_eal/common/eal_memalloc.h @@ -16,4 +16,7 @@ int eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size, int socket, bool exact); +int +eal_memalloc_free_page(struct rte_memseg *ms); + #endif // EAL_MEMALLOC_H diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c index 1ba1201..bbeeeba 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c +++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c @@ -499,6 +499,64 @@ alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id, return -1; } +static int +free_page(struct rte_memseg *ms, struct hugepage_info *hi, + unsigned int list_idx, unsigned int seg_idx) +{ + uint64_t map_offset; + char path[PATH_MAX]; + int fd, ret; + + if (mmap(ms->addr, ms->hugepage_sz, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == + MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "couldn't unmap page\n"); + return -1; + } + + fd = get_page_fd(path, sizeof(path), hi, list_idx, seg_idx); + if (fd < 0) + return -1; + + if (internal_config.single_file_segments) { + map_offset = seg_idx * ms->hugepage_sz; + if (resize_hugefile(fd, map_offset, ms->hugepage_sz, false)) + return -1; + /* if file is zero-length, we've already shrunk it, so it's + * safe to remove. + */ + if (is_zero_length(fd)) { + struct msl_entry *te = get_msl_entry_by_idx(list_idx); + if (te != NULL && te->fd >= 0) { + close(te->fd); + te->fd = -1; + } + unlink(path); + } + ret = 0; + } else { + /* if we're able to take out a write lock, we're the last one + * holding onto this page. + */ + + ret = lock(fd, 0, ms->hugepage_sz, F_WRLCK); + if (ret >= 0) { + /* no one else is using this page */ + if (ret == 1) + unlink(path); + ret = lock(fd, 0, ms->hugepage_sz, F_UNLCK); + if (ret != 1) + RTE_LOG(ERR, EAL, "%s(): unable to unlock file %s\n", + __func__, path); + } + close(fd); + } + + memset(ms, 0, sizeof(*ms)); + + return ret; +} + int eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size, int socket, bool exact) @@ -507,7 +565,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, struct rte_memseg_list *msl = NULL; void *addr; unsigned int msl_idx; - int cur_idx, end_idx, i, ret = -1; + int cur_idx, start_idx, end_idx, i, j, ret = -1; #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES bool have_numa; int oldpolicy; @@ -557,6 +615,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, continue; end_idx = cur_idx + n; + start_idx = cur_idx; for (i = 0; cur_idx < end_idx; cur_idx++, i++) { struct rte_memseg *cur; @@ -567,25 +626,56 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, if (alloc_page(cur, addr, size, socket, hi, msl_idx, cur_idx)) { + RTE_LOG(DEBUG, EAL, "attempted to allocate %i pages, but only %i were allocated\n", n, i); - /* if exact number wasn't requested, stop */ - if (!exact) + /* if exact number of pages wasn't requested, + * failing to allocate is not an error. we could + * of course try other lists to see if there are + * better fits, but a bird in the hand... + */ + if (!exact) { ret = i; - goto restore_numa; + goto restore_numa; + } + RTE_LOG(DEBUG, EAL, "exact amount of pages was requested, so returning %i allocated pages\n", + i); + + /* clean up */ + for (j = start_idx; j < cur_idx; j++) { + struct rte_memseg *tmp; + struct rte_fbarray *arr = + &msl->memseg_arr; + + tmp = rte_fbarray_get(arr, j); + if (free_page(tmp, hi, msl_idx, + start_idx + j)) + rte_panic("Cannot free page\n"); + + rte_fbarray_set_free(arr, j); + } + /* clear the list */ + if (ms) + memset(ms, 0, sizeof(*ms) * n); + + /* try next list */ + goto next_list; } if (ms) ms[i] = cur; rte_fbarray_set_used(&msl->memseg_arr, cur_idx); } + /* we allocated all pages */ ret = n; break; +next_list: + /* dummy semi-colon to make label work */; } /* we didn't break */ - if (!msl) { + if (msl_idx == RTE_MAX_MEMSEG_LISTS) { RTE_LOG(ERR, EAL, "%s(): couldn't find suitable memseg_list\n", __func__); } @@ -607,3 +697,51 @@ eal_memalloc_alloc_page(uint64_t size, int socket) /* return pointer to newly allocated memseg */ return ms; } + +int +eal_memalloc_free_page(struct rte_memseg *ms) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl = NULL; + unsigned int msl_idx, seg_idx; + struct hugepage_info *hi = NULL; + int i; + + /* dynamic free not supported in legacy mode */ + if (internal_config.legacy_mem) + return -1; + + for (i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) { + if (ms->hugepage_sz == + internal_config.hugepage_info[i].hugepage_sz) { + hi = &internal_config.hugepage_info[i]; + break; + } + } + if (!hi) { + RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n"); + return -1; + } + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + uintptr_t start_addr, end_addr; + struct rte_memseg_list *cur = &mcfg->memsegs[msl_idx]; + + start_addr = (uintptr_t) cur->base_va; + end_addr = start_addr + cur->memseg_arr.len * cur->hugepage_sz; + + if ((uintptr_t) ms->addr < start_addr || + (uintptr_t) ms->addr >= end_addr) { + continue; + } + msl = cur; + seg_idx = RTE_PTR_DIFF(ms->addr, start_addr) / ms->hugepage_sz; + break; + } + if (!msl) { + RTE_LOG(ERR, EAL, "Couldn't find memseg list\n"); + return -1; + } + rte_fbarray_set_free(&msl->memseg_arr, seg_idx); + return free_page(ms, hi, msl_idx, seg_idx); +} -- 2.7.4