From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by dpdk.org (Postfix) with ESMTP id C56181B7E1 for ; Mon, 9 Apr 2018 20:01:25 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 09 Apr 2018 11:01:25 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.48,427,1517904000"; d="scan'208";a="218993523" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga006.fm.intel.com with ESMTP; 09 Apr 2018 11:01:22 -0700 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w39I1L5a031036; Mon, 9 Apr 2018 19:01:21 +0100 Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w39I1Lhb027682; Mon, 9 Apr 2018 19:01:21 +0100 Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w39I1LYu027678; Mon, 9 Apr 2018 19:01:21 +0100 From: Anatoly Burakov To: dev@dpdk.org Cc: keith.wiles@intel.com, jianfeng.tan@intel.com, andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net, konstantin.ananyev@intel.com, kuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com, nelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch, jerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com, olivier.matz@6wind.com, shreyansh.jain@nxp.com, gowrishankar.m@linux.vnet.ibm.com Date: Mon, 9 Apr 2018 19:00:13 +0100 Message-Id: <6fd544b79740c4eae7eb111d103d08375fd9f96b.1523296700.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH v5 10/70] eal: add backend support for contiguous allocation X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 09 Apr 2018 18:01:26 -0000 No major changes, just add some checks in a few key places, and a new parameter to pass around. Also, add a function to check malloc element for physical contiguousness. For now, assume hugepage memory is always contiguous, while non-hugepage memory will be checked. Signed-off-by: Anatoly Burakov Tested-by: Santosh Shukla Tested-by: Hemant Agrawal --- Notes: v3: - Moved this patch earlier - Added physical contiguousness checking function lib/librte_eal/common/eal_common_memzone.c | 23 +++--- lib/librte_eal/common/malloc_elem.c | 125 ++++++++++++++++++++++++----- lib/librte_eal/common/malloc_elem.h | 6 +- lib/librte_eal/common/malloc_heap.c | 11 +-- lib/librte_eal/common/malloc_heap.h | 4 +- lib/librte_eal/common/rte_malloc.c | 7 +- 6 files changed, 133 insertions(+), 43 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 1ab3ade..16a2e7a 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -98,7 +98,8 @@ find_heap_max_free_elem(int *s, unsigned align) static const struct rte_memzone * memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, - int socket_id, unsigned flags, unsigned align, unsigned bound) + int socket_id, unsigned int flags, unsigned int align, + unsigned int bound, bool contig) { struct rte_memzone *mz; struct rte_mem_config *mcfg; @@ -188,7 +189,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, /* allocate memory on heap */ void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL, - requested_len, flags, align, bound); + requested_len, flags, align, bound, contig); if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) { /* try other heaps */ @@ -197,7 +198,8 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, continue; mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i], - NULL, requested_len, flags, align, bound); + NULL, requested_len, flags, align, + bound, contig); if (mz_addr != NULL) break; } @@ -235,9 +237,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, } static const struct rte_memzone * -rte_memzone_reserve_thread_safe(const char *name, size_t len, - int socket_id, unsigned flags, unsigned align, - unsigned bound) +rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id, + unsigned int flags, unsigned int align, unsigned int bound, + bool contig) { struct rte_mem_config *mcfg; const struct rte_memzone *mz = NULL; @@ -248,7 +250,7 @@ rte_memzone_reserve_thread_safe(const char *name, size_t len, rte_rwlock_write_lock(&mcfg->mlock); mz = memzone_reserve_aligned_thread_unsafe( - name, len, socket_id, flags, align, bound); + name, len, socket_id, flags, align, bound, contig); rte_rwlock_write_unlock(&mcfg->mlock); @@ -265,7 +267,7 @@ rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id, unsigned flags, unsigned align, unsigned bound) { return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, - align, bound); + align, bound, false); } /* @@ -277,7 +279,7 @@ rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id, unsigned flags, unsigned align) { return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, - align, 0); + align, 0, false); } /* @@ -289,7 +291,8 @@ rte_memzone_reserve(const char *name, size_t len, int socket_id, unsigned flags) { return rte_memzone_reserve_thread_safe(name, len, socket_id, - flags, RTE_CACHE_LINE_SIZE, 0); + flags, RTE_CACHE_LINE_SIZE, 0, + false); } int diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index c18f050..87695b9 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -94,33 +95,112 @@ malloc_elem_insert(struct malloc_elem *elem) } /* + * Attempt to find enough physically contiguous memory in this block to store + * our data. Assume that element has at least enough space to fit in the data, + * so we just check the page addresses. + */ +static bool +elem_check_phys_contig(const struct rte_memseg *ms __rte_unused, + void *start, size_t size) +{ + rte_iova_t cur, expected; + void *start_page, *end_page, *cur_page; + size_t pagesz; + + /* for hugepage memory or IOVA as VA, it's always contiguous */ + if (rte_eal_has_hugepages() || rte_eal_iova_mode() == RTE_IOVA_VA) + return true; + + /* otherwise, check if start and end are within the same page */ + pagesz = getpagesize(); + + start_page = RTE_PTR_ALIGN_FLOOR(start, pagesz); + end_page = RTE_PTR_ALIGN_FLOOR(RTE_PTR_ADD(start, size - 1), pagesz); + + if (start_page == end_page) + return true; + + /* if they are from different pages, check if they are contiguous */ + + /* if we can't access physical addresses, assume non-contiguous */ + if (!rte_eal_using_phys_addrs()) + return false; + + /* skip first iteration */ + cur = rte_mem_virt2iova(start_page); + expected = cur + pagesz; + cur_page = RTE_PTR_ADD(start_page, pagesz); + + while (cur_page <= end_page) { + cur = rte_mem_virt2iova(cur_page); + if (cur != expected) + return false; + cur_page = RTE_PTR_ADD(cur_page, pagesz); + expected += pagesz; + } + return true; +} + +/* * calculate the starting point of where data of the requested size * and alignment would fit in the current element. If the data doesn't * fit, return NULL. */ static void * elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, - size_t bound) + size_t bound, bool contig) { - const size_t bmask = ~(bound - 1); - uintptr_t end_pt = (uintptr_t)elem + - elem->size - MALLOC_ELEM_TRAILER_LEN; - uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); - uintptr_t new_elem_start; - - /* check boundary */ - if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { - end_pt = RTE_ALIGN_FLOOR(end_pt, bound); - new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); - end_pt = new_data_start + size; - if (((end_pt - 1) & bmask) != (new_data_start & bmask)) - return NULL; - } + size_t elem_size = elem->size; + + /* + * we're allocating from the end, so adjust the size of element by + * alignment size. + */ + while (elem_size >= size) { + const size_t bmask = ~(bound - 1); + uintptr_t end_pt = (uintptr_t)elem + + elem_size - MALLOC_ELEM_TRAILER_LEN; + uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), + align); + uintptr_t new_elem_start; + + /* check boundary */ + if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { + end_pt = RTE_ALIGN_FLOOR(end_pt, bound); + new_data_start = RTE_ALIGN_FLOOR((end_pt - size), + align); + end_pt = new_data_start + size; + + if (((end_pt - 1) & bmask) != (new_data_start & bmask)) + return NULL; + } + + new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; - new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; + /* if the new start point is before the exist start, + * it won't fit + */ + if (new_elem_start < (uintptr_t)elem) + return NULL; - /* if the new start point is before the exist start, it won't fit */ - return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start; + if (contig) { + size_t new_data_size = end_pt - new_data_start; + + /* + * if physical contiguousness was requested and we + * couldn't fit all data into one physically contiguous + * block, try again with lower addresses. + */ + if (!elem_check_phys_contig(elem->ms, + (void *)new_data_start, + new_data_size)) { + elem_size -= align; + continue; + } + } + return (void *)new_elem_start; + } + return NULL; } /* @@ -129,9 +209,9 @@ elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, */ int malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, - size_t bound) + size_t bound, bool contig) { - return elem_start_pt(elem, size, align, bound) != NULL; + return elem_start_pt(elem, size, align, bound, contig) != NULL; } /* @@ -259,9 +339,10 @@ malloc_elem_free_list_remove(struct malloc_elem *elem) */ struct malloc_elem * malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, - size_t bound) + size_t bound, bool contig) { - struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound); + struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound, + contig); const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; const size_t trailer_size = elem->size - old_elem_size - size - MALLOC_ELEM_OVERHEAD; diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h index 9c1614c..34bd268 100644 --- a/lib/librte_eal/common/malloc_elem.h +++ b/lib/librte_eal/common/malloc_elem.h @@ -5,6 +5,8 @@ #ifndef MALLOC_ELEM_H_ #define MALLOC_ELEM_H_ +#include + #include /* dummy definition of struct so we can use pointers to it in malloc_elem struct */ @@ -123,7 +125,7 @@ malloc_elem_insert(struct malloc_elem *elem); */ int malloc_elem_can_hold(struct malloc_elem *elem, size_t size, - unsigned align, size_t bound); + unsigned int align, size_t bound, bool contig); /* * reserve a block of data in an existing malloc_elem. If the malloc_elem @@ -131,7 +133,7 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size, */ struct malloc_elem * malloc_elem_alloc(struct malloc_elem *elem, size_t size, - unsigned align, size_t bound); + unsigned int align, size_t bound, bool contig); /* * free a malloc_elem block by adding it to the free list. If the diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index a2c2e4c..564b61a 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -88,7 +88,7 @@ malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms) */ static struct malloc_elem * find_suitable_element(struct malloc_heap *heap, size_t size, - unsigned flags, size_t align, size_t bound) + unsigned int flags, size_t align, size_t bound, bool contig) { size_t idx; struct malloc_elem *elem, *alt_elem = NULL; @@ -97,7 +97,8 @@ find_suitable_element(struct malloc_heap *heap, size_t size, idx < RTE_HEAP_NUM_FREELISTS; idx++) { for (elem = LIST_FIRST(&heap->free_head[idx]); !!elem; elem = LIST_NEXT(elem, free_list)) { - if (malloc_elem_can_hold(elem, size, align, bound)) { + if (malloc_elem_can_hold(elem, size, align, bound, + contig)) { if (check_hugepage_sz(flags, elem->ms->hugepage_sz)) return elem; if (alt_elem == NULL) @@ -121,7 +122,7 @@ find_suitable_element(struct malloc_heap *heap, size_t size, void * malloc_heap_alloc(struct malloc_heap *heap, const char *type __attribute__((unused)), size_t size, unsigned flags, - size_t align, size_t bound) + size_t align, size_t bound, bool contig) { struct malloc_elem *elem; @@ -130,9 +131,9 @@ malloc_heap_alloc(struct malloc_heap *heap, rte_spinlock_lock(&heap->lock); - elem = find_suitable_element(heap, size, flags, align, bound); + elem = find_suitable_element(heap, size, flags, align, bound, contig); if (elem != NULL) { - elem = malloc_elem_alloc(elem, size, align, bound); + elem = malloc_elem_alloc(elem, size, align, bound, contig); /* increase heap's count of allocated elements */ heap->alloc_count++; } diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index bb28422..c57b59a 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -5,6 +5,8 @@ #ifndef MALLOC_HEAP_H_ #define MALLOC_HEAP_H_ +#include + #include #include @@ -25,7 +27,7 @@ malloc_get_numa_socket(void) void * malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size, - unsigned flags, size_t align, size_t bound); + unsigned int flags, size_t align, size_t bound, bool contig); int malloc_heap_free(struct malloc_elem *elem); diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 2cda48e..436818a 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -37,7 +37,8 @@ void rte_free(void *addr) * Allocate memory on specified heap. */ void * -rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) +rte_malloc_socket(const char *type, size_t size, unsigned int align, + int socket_arg) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; int socket, i; @@ -60,7 +61,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) return NULL; ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type, - size, 0, align == 0 ? 1 : align, 0); + size, 0, align == 0 ? 1 : align, 0, false); if (ret != NULL || socket_arg != SOCKET_ID_ANY) return ret; @@ -71,7 +72,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) continue; ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type, - size, 0, align == 0 ? 1 : align, 0); + size, 0, align == 0 ? 1 : align, 0, false); if (ret != NULL) return ret; } -- 2.7.4