From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by dpdk.org (Postfix) with ESMTP id 9FA531B022 for ; Tue, 19 Dec 2017 12:14:56 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 19 Dec 2017 03:14:55 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,426,1508828400"; d="scan'208";a="3709900" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga008.fm.intel.com with ESMTP; 19 Dec 2017 03:14:53 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id vBJBErYk003138; Tue, 19 Dec 2017 11:14:53 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id vBJBEqsb010302; Tue, 19 Dec 2017 11:14:52 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id vBJBEqF7010298; Tue, 19 Dec 2017 11:14:52 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, keith.wiles@intel.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net Date: Tue, 19 Dec 2017 11:14:44 +0000 Message-Id: <5affaa884964f8cd1025e991caf58c67783a52ab.1513681966.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 17/23] eal: add backend support for contiguous memory allocation X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 19 Dec 2017 11:14:57 -0000 No major changes, just add some checks in a few key places, and a new parameter to pass around. Signed-off-by: Anatoly Burakov --- lib/librte_eal/common/eal_common_memzone.c | 16 +++-- lib/librte_eal/common/malloc_elem.c | 105 +++++++++++++++++++++++------ lib/librte_eal/common/malloc_elem.h | 6 +- lib/librte_eal/common/malloc_heap.c | 54 +++++++++------ lib/librte_eal/common/malloc_heap.h | 6 +- lib/librte_eal/common/rte_malloc.c | 38 +++++++---- 6 files changed, 158 insertions(+), 67 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index c571145..542ae90 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -127,7 +127,8 @@ find_heap_max_free_elem(int *s, unsigned align) static const struct rte_memzone * memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, - int socket_id, unsigned flags, unsigned align, unsigned bound) + int socket_id, unsigned flags, unsigned align, unsigned bound, + bool contig) { struct rte_memzone *mz; struct rte_mem_config *mcfg; @@ -217,7 +218,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, /* allocate memory on heap */ void *mz_addr = malloc_heap_alloc(NULL, requested_len, socket, flags, - align, bound); + align, bound, contig); if (mz_addr == NULL) { rte_errno = ENOMEM; @@ -251,7 +252,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, static const struct rte_memzone * rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id, unsigned flags, unsigned align, - unsigned bound) + unsigned bound, bool contig) { struct rte_mem_config *mcfg; const struct rte_memzone *mz = NULL; @@ -262,7 +263,7 @@ rte_memzone_reserve_thread_safe(const char *name, size_t len, rte_rwlock_write_lock(&mcfg->mlock); mz = memzone_reserve_aligned_thread_unsafe( - name, len, socket_id, flags, align, bound); + name, len, socket_id, flags, align, bound, contig); rte_rwlock_write_unlock(&mcfg->mlock); @@ -279,7 +280,7 @@ rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id, unsigned flags, unsigned align, unsigned bound) { return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, - align, bound); + align, bound, false); } /* @@ -291,7 +292,7 @@ rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id, unsigned flags, unsigned align) { return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, - align, 0); + align, 0, false); } /* @@ -303,7 +304,8 @@ rte_memzone_reserve(const char *name, size_t len, int socket_id, unsigned flags) { return rte_memzone_reserve_thread_safe(name, len, socket_id, - flags, RTE_CACHE_LINE_SIZE, 0); + flags, RTE_CACHE_LINE_SIZE, 0, + false); } int diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index 48ac604..a7d7cef 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -45,6 +45,7 @@ #include #include +#include "eal_memalloc.h" #include "malloc_elem.h" #include "malloc_heap.h" @@ -122,32 +123,83 @@ malloc_elem_insert(struct malloc_elem *elem) } /* + * Attempt to find enough physically contiguous memory in this block to store + * our data. Assume that element has at least enough space to fit in the data, + * so we just check the page addresses. + */ +static bool +elem_check_phys_contig(const struct rte_memseg_list *msl, void *start, + size_t size) { + uint64_t page_sz; + void *aligned_start, *end, *aligned_end; + size_t aligned_len; + + /* figure out how many pages we need to fit in current data */ + page_sz = msl->hugepage_sz; + aligned_start = RTE_PTR_ALIGN_FLOOR(start, page_sz); + end = RTE_PTR_ADD(start, size); + aligned_end = RTE_PTR_ALIGN_CEIL(end, page_sz); + + aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start); + + return eal_memalloc_is_contig(msl, aligned_start, aligned_len); +} + +/* * calculate the starting point of where data of the requested size * and alignment would fit in the current element. If the data doesn't * fit, return NULL. */ static void * elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, - size_t bound) + size_t bound, bool contig) { - const size_t bmask = ~(bound - 1); - uintptr_t end_pt = (uintptr_t)elem + - elem->size - MALLOC_ELEM_TRAILER_LEN; - uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); - uintptr_t new_elem_start; - - /* check boundary */ - if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { - end_pt = RTE_ALIGN_FLOOR(end_pt, bound); - new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); - if (((end_pt - 1) & bmask) != (new_data_start & bmask)) - return NULL; - } + size_t elem_size = elem->size; - new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; + /* + * we're allocating from the end, so adjust the size of element by page + * size each time + */ + while (elem_size >= size) { + const size_t bmask = ~(bound - 1); + uintptr_t end_pt = (uintptr_t)elem + + elem_size - MALLOC_ELEM_TRAILER_LEN; + uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + uintptr_t new_elem_start; + + /* check boundary */ + if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { + end_pt = RTE_ALIGN_FLOOR(end_pt, bound); + new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + end_pt = new_data_start + size; + + if (((end_pt - 1) & bmask) != (new_data_start & bmask)) + return NULL; + } - /* if the new start point is before the exist start, it won't fit */ - return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start; + new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; + + /* if the new start point is before the exist start, it won't fit */ + if (new_elem_start < (uintptr_t)elem) + return NULL; + + if (contig) { + size_t new_data_size = end_pt - new_data_start; + + /* + * if physical contiguousness was requested and we + * couldn't fit all data into one physically contiguous + * block, try again with lower addresses. + */ + if (!elem_check_phys_contig(elem->msl, + (void*) new_data_start, new_data_size)) { + elem_size -= align; + continue; + } + } + return (void *) new_elem_start; + } + return NULL; } /* @@ -156,9 +208,9 @@ elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, */ int malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, - size_t bound) + size_t bound, bool contig) { - return elem_start_pt(elem, size, align, bound) != NULL; + return elem_start_pt(elem, size, align, bound, contig) != NULL; } /* @@ -283,9 +335,10 @@ malloc_elem_free_list_remove(struct malloc_elem *elem) */ struct malloc_elem * malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, - size_t bound) + size_t bound, bool contig) { - struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound); + struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound, + contig); const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; const size_t trailer_size = elem->size - old_elem_size - size - MALLOC_ELEM_OVERHEAD; @@ -508,9 +561,11 @@ malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len) { * immediately after it in memory. */ int -malloc_elem_resize(struct malloc_elem *elem, size_t size) +malloc_elem_resize(struct malloc_elem *elem, size_t size, bool contig) { const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD; + const size_t new_data_size = new_size - MALLOC_ELEM_OVERHEAD; + void *data_ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); /* if we request a smaller size, then always return ok */ if (elem->size >= new_size) @@ -523,6 +578,12 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size) if (elem->size + elem->next->size < new_size) return -1; + /* if physical contiguousness was requested, check that as well */ + if (contig && !elem_check_phys_contig(elem->msl, + data_ptr, new_data_size)) { + return -1; + } + /* we now know the element fits, so remove from free list, * join the two */ diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h index b47c55e..02d6bd7 100644 --- a/lib/librte_eal/common/malloc_elem.h +++ b/lib/librte_eal/common/malloc_elem.h @@ -149,7 +149,7 @@ malloc_elem_insert(struct malloc_elem *elem); */ int malloc_elem_can_hold(struct malloc_elem *elem, size_t size, - unsigned align, size_t bound); + unsigned align, size_t bound, bool contig); /* * reserve a block of data in an existing malloc_elem. If the malloc_elem @@ -157,7 +157,7 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size, */ struct malloc_elem * malloc_elem_alloc(struct malloc_elem *elem, size_t size, - unsigned align, size_t bound); + unsigned align, size_t bound, bool contig); /* * free a malloc_elem block by adding it to the free list. If the @@ -175,7 +175,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem); * immediately after it in memory. */ int -malloc_elem_resize(struct malloc_elem *elem, size_t size); +malloc_elem_resize(struct malloc_elem *elem, size_t size, bool contig); void malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len); diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 0d61704..427f7c6 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -123,7 +123,7 @@ malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl, */ static struct malloc_elem * find_suitable_element(struct malloc_heap *heap, size_t size, - unsigned flags, size_t align, size_t bound) + unsigned flags, size_t align, size_t bound, bool contig) { size_t idx; struct malloc_elem *elem, *alt_elem = NULL; @@ -132,7 +132,8 @@ find_suitable_element(struct malloc_heap *heap, size_t size, idx < RTE_HEAP_NUM_FREELISTS; idx++) { for (elem = LIST_FIRST(&heap->free_head[idx]); !!elem; elem = LIST_NEXT(elem, free_list)) { - if (malloc_elem_can_hold(elem, size, align, bound)) { + if (malloc_elem_can_hold(elem, size, align, bound, + contig)) { if (check_hugepage_sz(flags, elem->msl->hugepage_sz)) return elem; if (alt_elem == NULL) @@ -155,16 +156,16 @@ find_suitable_element(struct malloc_heap *heap, size_t size, */ static void * heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size, - unsigned flags, size_t align, size_t bound) + unsigned flags, size_t align, size_t bound, bool contig) { struct malloc_elem *elem; size = RTE_CACHE_LINE_ROUNDUP(size); align = RTE_CACHE_LINE_ROUNDUP(align); - elem = find_suitable_element(heap, size, flags, align, bound); + elem = find_suitable_element(heap, size, flags, align, bound, contig); if (elem != NULL) { - elem = malloc_elem_alloc(elem, size, align, bound); + elem = malloc_elem_alloc(elem, size, align, bound, contig); /* increase heap's count of allocated elements */ heap->alloc_count++; @@ -176,13 +177,13 @@ heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size, static void * try_expand_heap(struct malloc_heap *heap, struct rte_memseg_list *msl, const char *type, size_t size, int socket, unsigned flags, - size_t align, size_t bound) { + size_t align, size_t bound, bool contig) { struct malloc_elem *elem; struct rte_memseg **ms; - size_t map_len; + size_t map_len, data_start_offset; void *map_addr; int i, n_pages, allocd_pages; - void *ret; + void *ret, *data_start; align = RTE_MAX(align, MALLOC_ELEM_HEADER_LEN); map_len = RTE_ALIGN_CEIL(align + size + MALLOC_ELEM_TRAILER_LEN, @@ -200,6 +201,16 @@ try_expand_heap(struct malloc_heap *heap, struct rte_memseg_list *msl, if (allocd_pages != n_pages) goto free_ms; + /* check if we wanted contiguous memory but didn't get it */ + data_start_offset = RTE_ALIGN(MALLOC_ELEM_HEADER_LEN, align); + data_start = RTE_PTR_ADD(ms[0]->addr, data_start_offset); + if (contig && !eal_memalloc_is_contig(msl, data_start, + n_pages * msl->hugepage_sz)) { + RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically contiguous space\n", + __func__); + goto free_pages; + } + map_addr = ms[0]->addr; /* add newly minted memsegs to malloc heap */ @@ -210,7 +221,7 @@ try_expand_heap(struct malloc_heap *heap, struct rte_memseg_list *msl, /* try once more, as now we have allocated new memory */ ret = heap_alloc(heap, type, size, flags, - align == 0 ? 1 : align, bound); + align == 0 ? 1 : align, bound, contig); if (ret == NULL) goto free_elem; @@ -225,7 +236,7 @@ try_expand_heap(struct malloc_heap *heap, struct rte_memseg_list *msl, RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate, so shrinking heap on socket %d by %zdMB\n", __func__, socket, map_len >> 20ULL); - +free_pages: for (i = 0; i < n_pages; i++) { eal_memalloc_free_page(ms[i]); } @@ -249,7 +260,7 @@ compare_pagesz(const void *a, const void *b) { /* this will try lower page sizes first */ static void * heap_alloc_on_socket(const char *type, size_t size, int socket, - unsigned flags, size_t align, size_t bound) { + unsigned flags, size_t align, size_t bound, bool contig) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; struct malloc_heap *heap = &mcfg->malloc_heaps[socket]; struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS]; @@ -264,7 +275,7 @@ heap_alloc_on_socket(const char *type, size_t size, int socket, /* for legacy mode, try once and with all flags */ if (internal_config.legacy_mem) { ret = heap_alloc(heap, type, size, flags, - align == 0 ? 1 : align, bound); + align == 0 ? 1 : align, bound, contig); goto alloc_unlock; } @@ -274,7 +285,7 @@ heap_alloc_on_socket(const char *type, size_t size, int socket, * we just need to request more memory first. */ ret = heap_alloc(heap, type, size, size_flags, align == 0 ? 1 : align, - bound); + bound, contig); if (ret != NULL) goto alloc_unlock; @@ -317,7 +328,7 @@ heap_alloc_on_socket(const char *type, size_t size, int socket, * sizes first, before resorting to best effort allocation. */ ret = try_expand_heap(heap, msl, type, size, socket, size_flags, - align, bound); + align, bound, contig); if (ret != NULL) goto alloc_unlock; } @@ -326,7 +337,7 @@ heap_alloc_on_socket(const char *type, size_t size, int socket, /* now, try reserving with size hint */ ret = heap_alloc(heap, type, size, flags, align == 0 ? 1 : align, - bound); + bound, contig); if (ret != NULL) goto alloc_unlock; @@ -338,7 +349,7 @@ heap_alloc_on_socket(const char *type, size_t size, int socket, struct rte_memseg_list *msl = other_msls[i]; ret = try_expand_heap(heap, msl, type, size, socket, flags, - align, bound); + align, bound, contig); if (ret != NULL) goto alloc_unlock; } @@ -349,7 +360,7 @@ heap_alloc_on_socket(const char *type, size_t size, int socket, void * malloc_heap_alloc(const char *type, size_t size, int socket_arg, unsigned flags, - size_t align, size_t bound) { + size_t align, size_t bound, bool contig) { int socket, i; void *ret; @@ -371,7 +382,8 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg, unsigned flags, // TODO: add warning for alignments bigger than page size if not VFIO - ret = heap_alloc_on_socket(type, size, socket, flags, align, bound); + ret = heap_alloc_on_socket(type, size, socket, flags, align, bound, + contig); if (ret != NULL || socket_arg != SOCKET_ID_ANY) return ret; @@ -380,7 +392,7 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg, unsigned flags, if (i == socket) continue; ret = heap_alloc_on_socket(type, size, socket, flags, - align, bound); + align, bound, contig); if (ret != NULL) return ret; } @@ -455,7 +467,7 @@ malloc_heap_free(struct malloc_elem *elem) { } int -malloc_heap_resize(struct malloc_elem *elem, size_t size) { +malloc_heap_resize(struct malloc_elem *elem, size_t size, bool contig) { int ret; if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) @@ -463,7 +475,7 @@ malloc_heap_resize(struct malloc_elem *elem, size_t size) { rte_spinlock_lock(&(elem->heap->lock)); - ret = malloc_elem_resize(elem, size); + ret = malloc_elem_resize(elem, size, contig); rte_spinlock_unlock(&(elem->heap->lock)); diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index 3fcd14f..e95b526 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -34,6 +34,8 @@ #ifndef MALLOC_HEAP_H_ #define MALLOC_HEAP_H_ +#include + #include #include @@ -54,13 +56,13 @@ malloc_get_numa_socket(void) void * malloc_heap_alloc(const char *type, size_t size, int socket, unsigned flags, - size_t align, size_t bound); + size_t align, size_t bound, bool contig); int malloc_heap_free(struct malloc_elem *elem); int -malloc_heap_resize(struct malloc_elem *elem, size_t size); +malloc_heap_resize(struct malloc_elem *elem, size_t size, bool contig); int malloc_heap_get_stats(struct malloc_heap *heap, diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index dc3199a..623725e 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -62,12 +62,9 @@ void rte_free(void *addr) rte_panic("Fatal error: Invalid memory\n"); } -/* - * Allocate memory on specified heap. - */ -void * -rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) -{ +static void * +malloc_socket(const char *type, size_t size, unsigned align, int socket_arg, + bool contig) { int socket; /* return NULL if size is 0 or alignment is not power-of-2 */ @@ -86,8 +83,16 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) if (socket >= RTE_MAX_NUMA_NODES) return NULL; - return malloc_heap_alloc(type, size, socket_arg, 0, - align == 0 ? 1 : align, 0); + return malloc_heap_alloc(type, size, socket_arg, 0, align, 0, contig); +} + +/* + * Allocate memory on specified heap. + */ +void * +rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) +{ + return malloc_socket(type, size, align, socket_arg, false); } /* @@ -138,8 +143,8 @@ rte_calloc(const char *type, size_t num, size_t size, unsigned align) /* * Resize allocated memory. */ -void * -rte_realloc(void *ptr, size_t size, unsigned align) +static void * +do_realloc(void *ptr, size_t size, unsigned align, bool contig) { if (ptr == NULL) return rte_malloc(NULL, size, align); @@ -151,12 +156,12 @@ rte_realloc(void *ptr, size_t size, unsigned align) size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align); /* check alignment matches first, and if ok, see if we can resize block */ if (RTE_PTR_ALIGN(ptr,align) == ptr && - malloc_heap_resize(elem, size) == 0) + malloc_heap_resize(elem, size, contig) == 0) return ptr; /* either alignment is off, or we have no room to expand, * so move data. */ - void *new_ptr = rte_malloc(NULL, size, align); + void *new_ptr = malloc_socket(NULL, size, align, SOCKET_ID_ANY, contig); if (new_ptr == NULL) return NULL; const unsigned old_size = elem->size - MALLOC_ELEM_OVERHEAD; @@ -166,6 +171,15 @@ rte_realloc(void *ptr, size_t size, unsigned align) return new_ptr; } +/* + * Resize allocated memory. + */ +void * +rte_realloc(void *ptr, size_t size, unsigned align) +{ + return do_realloc(ptr, size, align, false); +} + int rte_malloc_validate(const void *ptr, size_t *size) { -- 2.7.4