From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id EF733C3A8 for ; Sat, 6 Jun 2015 12:32:34 +0200 (CEST) Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga102.jf.intel.com with ESMTP; 06 Jun 2015 03:32:15 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.13,563,1427785200"; d="scan'208";a="742160957" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga002.jf.intel.com with ESMTP; 06 Jun 2015 03:32:14 -0700 Received: from sivswdev02.ir.intel.com (sivswdev02.ir.intel.com [10.237.217.46]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id t56AWDcn024216; Sat, 6 Jun 2015 11:32:13 +0100 Received: from sivswdev02.ir.intel.com (localhost [127.0.0.1]) by sivswdev02.ir.intel.com with ESMTP id t56AWDm0028270; Sat, 6 Jun 2015 11:32:13 +0100 Received: (from smonroy@localhost) by sivswdev02.ir.intel.com with id t56AWDX6028266; Sat, 6 Jun 2015 11:32:13 +0100 From: Sergio Gonzalez Monroy To: dev@dpdk.org Date: Sat, 6 Jun 2015 11:32:07 +0100 Message-Id: <1433586732-28217-3-git-send-email-sergio.gonzalez.monroy@intel.com> X-Mailer: git-send-email 1.8.5.4 In-Reply-To: <1433586732-28217-1-git-send-email-sergio.gonzalez.monroy@intel.com> References: <1431103079-18096-1-git-send-email-sergio.gonzalez.monroy@intel.com> <1433586732-28217-1-git-send-email-sergio.gonzalez.monroy@intel.com> Subject: [dpdk-dev] [PATCH v2 2/7] eal: memzone allocated by malloc X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 06 Jun 2015 10:32:36 -0000 In the current memory hierarchy, memsegs are groups of physically contiguous hugepages, memzones are slices of memsegs and malloc further slices memzones into smaller memory chunks. This patch modifies malloc so it partitions memsegs instead of memzones. Thus memzones would call malloc internally for memory allocation while maintaining its ABI. It would be possible to free memzones and therefore any other structure based on memzones, ie. mempools Signed-off-by: Sergio Gonzalez Monroy --- lib/librte_eal/common/eal_common_memzone.c | 273 ++++++---------------- lib/librte_eal/common/include/rte_eal_memconfig.h | 2 +- lib/librte_eal/common/include/rte_malloc_heap.h | 3 +- lib/librte_eal/common/include/rte_memory.h | 1 + lib/librte_eal/common/malloc_elem.c | 68 ++++-- lib/librte_eal/common/malloc_elem.h | 14 +- lib/librte_eal/common/malloc_heap.c | 140 ++++++----- lib/librte_eal/common/malloc_heap.h | 6 +- lib/librte_eal/common/rte_malloc.c | 7 +- 9 files changed, 197 insertions(+), 317 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 888f9e5..742f6c9 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -50,11 +50,10 @@ #include #include +#include "malloc_heap.h" +#include "malloc_elem.h" #include "eal_private.h" -/* internal copy of free memory segments */ -static struct rte_memseg *free_memseg = NULL; - static inline const struct rte_memzone * memzone_lookup_thread_unsafe(const char *name) { @@ -68,8 +67,9 @@ memzone_lookup_thread_unsafe(const char *name) * the algorithm is not optimal (linear), but there are few * zones and this function should be called at init only */ - for (i = 0; i < RTE_MAX_MEMZONE && mcfg->memzone[i].addr != NULL; i++) { - if (!strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE)) + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + if (mcfg->memzone[i].addr != NULL && + !strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE)) return &mcfg->memzone[i]; } @@ -88,39 +88,45 @@ rte_memzone_reserve(const char *name, size_t len, int socket_id, len, socket_id, flags, RTE_CACHE_LINE_SIZE); } -/* - * Helper function for memzone_reserve_aligned_thread_unsafe(). - * Calculate address offset from the start of the segment. - * Align offset in that way that it satisfy istart alignmnet and - * buffer of the requested length would not cross specified boundary. - */ -static inline phys_addr_t -align_phys_boundary(const struct rte_memseg *ms, size_t len, size_t align, - size_t bound) +/* Find the heap with the greatest free block size */ +static void +find_heap_max_free_elem(int *s, size_t *len, unsigned align) { - phys_addr_t addr_offset, bmask, end, start; - size_t step; + struct rte_mem_config *mcfg; + struct rte_malloc_socket_stats stats; + unsigned i; - step = RTE_MAX(align, bound); - bmask = ~((phys_addr_t)bound - 1); + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; - /* calculate offset to closest alignment */ - start = RTE_ALIGN_CEIL(ms->phys_addr, align); - addr_offset = start - ms->phys_addr; + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats); + if (stats.greatest_free_size > *len) { + *len = stats.greatest_free_size; + *s = i; + } + } + *len -= (MALLOC_ELEM_OVERHEAD + align); +} - while (addr_offset + len < ms->len) { +/* Find a heap that can allocate the requested size */ +static void +find_heap_suitable(int *s, size_t len, unsigned align) +{ + struct rte_mem_config *mcfg; + struct rte_malloc_socket_stats stats; + unsigned i; - /* check, do we meet boundary condition */ - end = start + len - (len != 0); - if ((start & bmask) == (end & bmask)) - break; + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; - /* calculate next offset */ - start = RTE_ALIGN_CEIL(start + 1, step); - addr_offset = start - ms->phys_addr; + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats); + if (stats.greatest_free_size >= len + MALLOC_ELEM_OVERHEAD + align) { + *s = i; + break; + } } - - return (addr_offset); } static const struct rte_memzone * @@ -128,13 +134,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, int socket_id, unsigned flags, unsigned align, unsigned bound) { struct rte_mem_config *mcfg; - unsigned i = 0; - int memseg_idx = -1; - uint64_t addr_offset, seg_offset = 0; size_t requested_len; - size_t memseg_len = 0; - phys_addr_t memseg_physaddr; - void *memseg_addr; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; @@ -166,7 +166,6 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, if (align < RTE_CACHE_LINE_SIZE) align = RTE_CACHE_LINE_SIZE; - /* align length on cache boundary. Check for overflow before doing so */ if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) { rte_errno = EINVAL; /* requested size too big */ @@ -180,129 +179,50 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); /* check that boundary condition is valid */ - if (bound != 0 && - (requested_len > bound || !rte_is_power_of_2(bound))) { + if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) { rte_errno = EINVAL; return NULL; } - /* find the smallest segment matching requirements */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - /* last segment */ - if (free_memseg[i].addr == NULL) - break; + if (len == 0) { + if (bound != 0) + requested_len = bound; + else + requested_len = 0; + } - /* empty segment, skip it */ - if (free_memseg[i].len == 0) - continue; - - /* bad socket ID */ - if (socket_id != SOCKET_ID_ANY && - free_memseg[i].socket_id != SOCKET_ID_ANY && - socket_id != free_memseg[i].socket_id) - continue; - - /* - * calculate offset to closest alignment that - * meets boundary conditions. - */ - addr_offset = align_phys_boundary(free_memseg + i, - requested_len, align, bound); - - /* check len */ - if ((requested_len + addr_offset) > free_memseg[i].len) - continue; - - /* check flags for hugepage sizes */ - if ((flags & RTE_MEMZONE_2MB) && - free_memseg[i].hugepage_sz == RTE_PGSIZE_1G) - continue; - if ((flags & RTE_MEMZONE_1GB) && - free_memseg[i].hugepage_sz == RTE_PGSIZE_2M) - continue; - if ((flags & RTE_MEMZONE_16MB) && - free_memseg[i].hugepage_sz == RTE_PGSIZE_16G) - continue; - if ((flags & RTE_MEMZONE_16GB) && - free_memseg[i].hugepage_sz == RTE_PGSIZE_16M) - continue; - - /* this segment is the best until now */ - if (memseg_idx == -1) { - memseg_idx = i; - memseg_len = free_memseg[i].len; - seg_offset = addr_offset; - } - /* find the biggest contiguous zone */ - else if (len == 0) { - if (free_memseg[i].len > memseg_len) { - memseg_idx = i; - memseg_len = free_memseg[i].len; - seg_offset = addr_offset; - } - } - /* - * find the smallest (we already checked that current - * zone length is > len - */ - else if (free_memseg[i].len + align < memseg_len || - (free_memseg[i].len <= memseg_len + align && - addr_offset < seg_offset)) { - memseg_idx = i; - memseg_len = free_memseg[i].len; - seg_offset = addr_offset; + if (socket_id == SOCKET_ID_ANY) { + if (requested_len == 0) + find_heap_max_free_elem(&socket_id, &requested_len, align); + else + find_heap_suitable(&socket_id, requested_len, align); + + if (socket_id == SOCKET_ID_ANY) { + rte_errno = ENOMEM; + return NULL; } } - /* no segment found */ - if (memseg_idx == -1) { - /* - * If RTE_MEMZONE_SIZE_HINT_ONLY flag is specified, - * try allocating again without the size parameter otherwise -fail. - */ - if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) && - ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB) - || (flags & RTE_MEMZONE_16MB) || (flags & RTE_MEMZONE_16GB))) - return memzone_reserve_aligned_thread_unsafe(name, - len, socket_id, 0, align, bound); - + /* allocate memory on heap */ + void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket_id], NULL, + requested_len, flags, align, bound); + if (mz_addr == NULL) { rte_errno = ENOMEM; return NULL; } - /* save aligned physical and virtual addresses */ - memseg_physaddr = free_memseg[memseg_idx].phys_addr + seg_offset; - memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr, - (uintptr_t) seg_offset); - - /* if we are looking for a biggest memzone */ - if (len == 0) { - if (bound == 0) - requested_len = memseg_len - seg_offset; - else - requested_len = RTE_ALIGN_CEIL(memseg_physaddr + 1, - bound) - memseg_physaddr; - } - - /* set length to correct value */ - len = (size_t)seg_offset + requested_len; - - /* update our internal state */ - free_memseg[memseg_idx].len -= len; - free_memseg[memseg_idx].phys_addr += len; - free_memseg[memseg_idx].addr = - (char *)free_memseg[memseg_idx].addr + len; + const struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* fill the zone in config */ struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++]; snprintf(mz->name, sizeof(mz->name), "%s", name); - mz->phys_addr = memseg_physaddr; - mz->addr = memseg_addr; - mz->len = requested_len; - mz->hugepage_sz = free_memseg[memseg_idx].hugepage_sz; - mz->socket_id = free_memseg[memseg_idx].socket_id; + mz->phys_addr = rte_malloc_virt2phy(mz_addr); + mz->addr = mz_addr; + mz->len = (requested_len == 0? elem->size: requested_len); + mz->hugepage_sz = elem->ms->hugepage_sz; + mz->socket_id = elem->ms->socket_id; mz->flags = 0; - mz->memseg_id = memseg_idx; + mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg; return mz; } @@ -419,45 +339,6 @@ rte_memzone_dump(FILE *f) } /* - * called by init: modify the free memseg list to have cache-aligned - * addresses and cache-aligned lengths - */ -static int -memseg_sanitize(struct rte_memseg *memseg) -{ - unsigned phys_align; - unsigned virt_align; - unsigned off; - - phys_align = memseg->phys_addr & RTE_CACHE_LINE_MASK; - virt_align = (unsigned long)memseg->addr & RTE_CACHE_LINE_MASK; - - /* - * sanity check: phys_addr and addr must have the same - * alignment - */ - if (phys_align != virt_align) - return -1; - - /* memseg is really too small, don't bother with it */ - if (memseg->len < (2 * RTE_CACHE_LINE_SIZE)) { - memseg->len = 0; - return 0; - } - - /* align start address */ - off = (RTE_CACHE_LINE_SIZE - phys_align) & RTE_CACHE_LINE_MASK; - memseg->phys_addr += off; - memseg->addr = (char *)memseg->addr + off; - memseg->len -= off; - - /* align end address */ - memseg->len &= ~((uint64_t)RTE_CACHE_LINE_MASK); - - return 0; -} - -/* * Init the memzone subsystem */ int @@ -465,14 +346,10 @@ rte_eal_memzone_init(void) { struct rte_mem_config *mcfg; const struct rte_memseg *memseg; - unsigned i = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; - /* mirror the runtime memsegs from config */ - free_memseg = mcfg->free_memseg; - /* secondary processes don't need to initialise anything */ if (rte_eal_process_type() == RTE_PROC_SECONDARY) return 0; @@ -485,33 +362,13 @@ rte_eal_memzone_init(void) rte_rwlock_write_lock(&mcfg->mlock); - /* fill in uninitialized free_memsegs */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if (memseg[i].addr == NULL) - break; - if (free_memseg[i].addr != NULL) - continue; - memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg)); - } - - /* make all zones cache-aligned */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if (free_memseg[i].addr == NULL) - break; - if (memseg_sanitize(&free_memseg[i]) < 0) { - RTE_LOG(ERR, EAL, "%s(): Sanity check failed\n", __func__); - rte_rwlock_write_unlock(&mcfg->mlock); - return -1; - } - } - /* delete all zones */ mcfg->memzone_idx = 0; memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); rte_rwlock_write_unlock(&mcfg->mlock); - return 0; + return rte_eal_malloc_heap_init(); } /* Walk all reserved memory zones */ diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h index 34f5abc..055212a 100644 --- a/lib/librte_eal/common/include/rte_eal_memconfig.h +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -73,7 +73,7 @@ struct rte_mem_config { struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */ struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */ - /* Runtime Physmem descriptors. */ + /* Runtime Physmem descriptors - NOT USED */ struct rte_memseg free_memseg[RTE_MAX_MEMSEG]; struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */ diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h index 716216f..b270356 100644 --- a/lib/librte_eal/common/include/rte_malloc_heap.h +++ b/lib/librte_eal/common/include/rte_malloc_heap.h @@ -40,7 +40,7 @@ #include /* Number of free lists per heap, grouped by size. */ -#define RTE_HEAP_NUM_FREELISTS 5 +#define RTE_HEAP_NUM_FREELISTS 13 /** * Structure to hold malloc heap @@ -48,7 +48,6 @@ struct malloc_heap { rte_spinlock_t lock; LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS]; - unsigned mz_count; unsigned alloc_count; size_t total_size; } __rte_cache_aligned; diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 7f8103f..675b630 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -100,6 +100,7 @@ struct rte_memseg { /**< store segment MFNs */ uint64_t mfn[DOM0_NUM_MEMBLOCK]; #endif + uint8_t used; /**< Used by a heap */ } __attribute__((__packed__)); /** diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index a5e1248..b54ee33 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -37,7 +37,6 @@ #include #include -#include #include #include #include @@ -56,10 +55,10 @@ */ void malloc_elem_init(struct malloc_elem *elem, - struct malloc_heap *heap, const struct rte_memzone *mz, size_t size) + struct malloc_heap *heap, const struct rte_memseg *ms, size_t size) { elem->heap = heap; - elem->mz = mz; + elem->ms = ms; elem->prev = NULL; memset(&elem->free_list, 0, sizeof(elem->free_list)); elem->state = ELEM_FREE; @@ -70,12 +69,12 @@ malloc_elem_init(struct malloc_elem *elem, } /* - * initialise a dummy malloc_elem header for the end-of-memzone marker + * initialise a dummy malloc_elem header for the end-of-memseg marker */ void malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) { - malloc_elem_init(elem, prev->heap, prev->mz, 0); + malloc_elem_init(elem, prev->heap, prev->ms, 0); elem->prev = prev; elem->state = ELEM_BUSY; /* mark busy so its never merged */ } @@ -86,12 +85,24 @@ malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) * fit, return NULL. */ static void * -elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align) +elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) { - const uintptr_t end_pt = (uintptr_t)elem + + const size_t bmask = ~(bound - 1); + uintptr_t end_pt = (uintptr_t)elem + elem->size - MALLOC_ELEM_TRAILER_LEN; - const uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); - const uintptr_t new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; + uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + uintptr_t new_elem_start; + + /* check boundary */ + if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { + end_pt = RTE_ALIGN_FLOOR(end_pt, bound); + new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + if (((end_pt - 1) & bmask) != (new_data_start & bmask)) + return NULL; + } + + new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; /* if the new start point is before the exist start, it won't fit */ return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start; @@ -102,9 +113,10 @@ elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align) * alignment request from the current element */ int -malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align) +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) { - return elem_start_pt(elem, size, align) != NULL; + return elem_start_pt(elem, size, align, bound) != NULL; } /* @@ -115,10 +127,10 @@ static void split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) { struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size); - const unsigned old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; - const unsigned new_elem_size = elem->size - old_elem_size; + const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; + const size_t new_elem_size = elem->size - old_elem_size; - malloc_elem_init(split_pt, elem->heap, elem->mz, new_elem_size); + malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size); split_pt->prev = elem; next_elem->prev = split_pt; elem->size = old_elem_size; @@ -168,8 +180,9 @@ malloc_elem_free_list_index(size_t size) void malloc_elem_free_list_insert(struct malloc_elem *elem) { - size_t idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); + size_t idx; + idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); elem->state = ELEM_FREE; LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list); } @@ -190,12 +203,26 @@ elem_free_list_remove(struct malloc_elem *elem) * is not done here, as it's done there previously. */ struct malloc_elem * -malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align) +malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) { - struct malloc_elem *new_elem = elem_start_pt(elem, size, align); - const unsigned old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; + struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound); + const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; + const size_t trailer_size = elem->size - old_elem_size - size - + MALLOC_ELEM_OVERHEAD; + + elem_free_list_remove(elem); - if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE){ + if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* split it, too much free space after elem */ + struct malloc_elem *new_free_elem = + RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD); + + split_elem(elem, new_free_elem); + malloc_elem_free_list_insert(new_free_elem); + } + + if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { /* don't split it, pad the element instead */ elem->state = ELEM_BUSY; elem->pad = old_elem_size; @@ -208,8 +235,6 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align) new_elem->size = elem->size - elem->pad; set_header(new_elem); } - /* remove element from free list */ - elem_free_list_remove(elem); return new_elem; } @@ -219,7 +244,6 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align) * Re-insert original element, in case its new size makes it * belong on a different list. */ - elem_free_list_remove(elem); split_elem(elem, new_elem); new_elem->state = ELEM_BUSY; malloc_elem_free_list_insert(elem); diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h index 9790b1a..e05d2ea 100644 --- a/lib/librte_eal/common/malloc_elem.h +++ b/lib/librte_eal/common/malloc_elem.h @@ -47,9 +47,9 @@ enum elem_state { struct malloc_elem { struct malloc_heap *heap; - struct malloc_elem *volatile prev; /* points to prev elem in memzone */ + struct malloc_elem *volatile prev; /* points to prev elem in memseg */ LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */ - const struct rte_memzone *mz; + const struct rte_memseg *ms; volatile enum elem_state state; uint32_t pad; size_t size; @@ -136,11 +136,11 @@ malloc_elem_from_data(const void *data) void malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap, - const struct rte_memzone *mz, + const struct rte_memseg *ms, size_t size); /* - * initialise a dummy malloc_elem header for the end-of-memzone marker + * initialise a dummy malloc_elem header for the end-of-memseg marker */ void malloc_elem_mkend(struct malloc_elem *elem, @@ -151,14 +151,16 @@ malloc_elem_mkend(struct malloc_elem *elem, * of the requested size and with the requested alignment */ int -malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align); +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); /* * reserve a block of data in an existing malloc_elem. If the malloc_elem * is much larger than the data block requested, we split the element in two. */ struct malloc_elem * -malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align); +malloc_elem_alloc(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); /* * free a malloc_elem block by adding it to the free list. If the diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index defb903..4a423c1 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -39,7 +39,6 @@ #include #include -#include #include #include #include @@ -54,123 +53,105 @@ #include "malloc_elem.h" #include "malloc_heap.h" -/* since the memzone size starts with a digit, it will appear unquoted in - * rte_config.h, so quote it so it can be passed to rte_str_to_size */ -#define MALLOC_MEMZONE_SIZE RTE_STR(RTE_MALLOC_MEMZONE_SIZE) - -/* - * returns the configuration setting for the memzone size as a size_t value - */ -static inline size_t -get_malloc_memzone_size(void) +static unsigned +check_hugepage_sz(unsigned flags, size_t hugepage_sz) { - return rte_str_to_size(MALLOC_MEMZONE_SIZE); + unsigned ret = 1; + + if ((flags & RTE_MEMZONE_2MB) && hugepage_sz == RTE_PGSIZE_1G) + ret = 0; + if ((flags & RTE_MEMZONE_1GB) && hugepage_sz == RTE_PGSIZE_2M) + ret = 0; + if ((flags & RTE_MEMZONE_16MB) && hugepage_sz == RTE_PGSIZE_16G) + ret = 0; + if ((flags & RTE_MEMZONE_16GB) && hugepage_sz == RTE_PGSIZE_16M) + ret = 0; + + return ret; } /* - * reserve an extra memory zone and make it available for use by a particular - * heap. This reserves the zone and sets a dummy malloc_elem header at the end + * Expand the heap with a memseg. + * This reserves the zone and sets a dummy malloc_elem header at the end * to prevent overflow. The rest of the zone is added to free list as a single * large free block */ -static int -malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align) +static void +malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms) { - const unsigned mz_flags = 0; - const size_t block_size = get_malloc_memzone_size(); - /* ensure the data we want to allocate will fit in the memzone */ - const size_t min_size = size + align + MALLOC_ELEM_OVERHEAD * 2; - const struct rte_memzone *mz = NULL; - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - unsigned numa_socket = heap - mcfg->malloc_heaps; - - size_t mz_size = min_size; - if (mz_size < block_size) - mz_size = block_size; - - char mz_name[RTE_MEMZONE_NAMESIZE]; - snprintf(mz_name, sizeof(mz_name), "MALLOC_S%u_HEAP_%u", - numa_socket, heap->mz_count++); - - /* try getting a block. if we fail and we don't need as big a block - * as given in the config, we can shrink our request and try again - */ - do { - mz = rte_memzone_reserve(mz_name, mz_size, numa_socket, - mz_flags); - if (mz == NULL) - mz_size /= 2; - } while (mz == NULL && mz_size > min_size); - if (mz == NULL) - return -1; - /* allocate the memory block headers, one at end, one at start */ - struct malloc_elem *start_elem = (struct malloc_elem *)mz->addr; - struct malloc_elem *end_elem = RTE_PTR_ADD(mz->addr, - mz_size - MALLOC_ELEM_OVERHEAD); + struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr; + struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr, + ms->len - MALLOC_ELEM_OVERHEAD); end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE); - const unsigned elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; - malloc_elem_init(start_elem, heap, mz, elem_size); + const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; + malloc_elem_init(start_elem, heap, ms, elem_size); malloc_elem_mkend(end_elem, start_elem); malloc_elem_free_list_insert(start_elem); - /* increase heap total size by size of new memzone */ - heap->total_size+=mz_size - MALLOC_ELEM_OVERHEAD; - return 0; + heap->total_size += elem_size; } /* * Iterates through the freelist for a heap to find a free element * which can store data of the required size and with the requested alignment. + * If size is 0, find the biggest available elem. * Returns null on failure, or pointer to element on success. */ static struct malloc_elem * -find_suitable_element(struct malloc_heap *heap, size_t size, unsigned align) +find_suitable_element(struct malloc_heap *heap, size_t size, + unsigned flags, size_t align, size_t bound) { size_t idx; - struct malloc_elem *elem; + struct malloc_elem *elem, *alt_elem = NULL; for (idx = malloc_elem_free_list_index(size); - idx < RTE_HEAP_NUM_FREELISTS; idx++) - { + idx < RTE_HEAP_NUM_FREELISTS; idx++) { for (elem = LIST_FIRST(&heap->free_head[idx]); - !!elem; elem = LIST_NEXT(elem, free_list)) - { - if (malloc_elem_can_hold(elem, size, align)) - return elem; + !!elem; elem = LIST_NEXT(elem, free_list)) { + if (malloc_elem_can_hold(elem, size, align, bound)) { + if (check_hugepage_sz(flags, elem->ms->hugepage_sz)) + return elem; + else + alt_elem = elem; + } } } + + if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY)) + return alt_elem; + return NULL; } /* - * Main function called by malloc to allocate a block of memory from the - * heap. It locks the free list, scans it, and adds a new memzone if the - * scan fails. Once the new memzone is added, it re-scans and should return + * Main function to allocate a block of memory from the heap. + * It locks the free list, scans it, and adds a new memseg if the + * scan fails. Once the new memseg is added, it re-scans and should return * the new element after releasing the lock. */ void * malloc_heap_alloc(struct malloc_heap *heap, - const char *type __attribute__((unused)), size_t size, unsigned align) + const char *type __attribute__((unused)), size_t size, unsigned flags, + size_t align, size_t bound) { + struct malloc_elem *elem; + size = RTE_CACHE_LINE_ROUNDUP(size); align = RTE_CACHE_LINE_ROUNDUP(align); + rte_spinlock_lock(&heap->lock); - struct malloc_elem *elem = find_suitable_element(heap, size, align); - if (elem == NULL){ - if ((malloc_heap_add_memzone(heap, size, align)) == 0) - elem = find_suitable_element(heap, size, align); - } - if (elem != NULL){ - elem = malloc_elem_alloc(elem, size, align); + elem = find_suitable_element(heap, size, flags, align, bound); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, bound); /* increase heap's count of allocated elements */ heap->alloc_count++; } rte_spinlock_unlock(&heap->lock); - return elem == NULL ? NULL : (void *)(&elem[1]); + return elem == NULL ? NULL : (void *)(&elem[1]); } /* @@ -207,3 +188,20 @@ malloc_heap_get_stats(const struct malloc_heap *heap, return 0; } +int +rte_eal_malloc_heap_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + unsigned ms_cnt; + struct rte_memseg *ms; + + if (mcfg == NULL) + return -1; + + for (ms = &mcfg->memseg[0], ms_cnt = 0; + (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0); + ms_cnt++, ms++) + malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms); + + return 0; +} diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index a47136d..3ccbef0 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -53,15 +53,15 @@ malloc_get_numa_socket(void) } void * -malloc_heap_alloc(struct malloc_heap *heap, const char *type, - size_t size, unsigned align); +malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size, + unsigned flags, size_t align, size_t bound); int malloc_heap_get_stats(const struct malloc_heap *heap, struct rte_malloc_socket_stats *socket_stats); int -rte_eal_heap_memzone_init(void); +rte_eal_malloc_heap_init(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index c313a57..54c2bd8 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -39,7 +39,6 @@ #include #include -#include #include #include #include @@ -87,7 +86,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) return NULL; ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type, - size, align == 0 ? 1 : align); + size, 0, align == 0 ? 1 : align, 0); if (ret != NULL || socket_arg != SOCKET_ID_ANY) return ret; @@ -98,7 +97,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) continue; ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type, - size, align == 0 ? 1 : align); + size, 0, align == 0 ? 1 : align, 0); if (ret != NULL) return ret; } @@ -256,5 +255,5 @@ rte_malloc_virt2phy(const void *addr) const struct malloc_elem *elem = malloc_elem_from_data(addr); if (elem == NULL) return 0; - return elem->mz->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->mz->addr); + return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr); } -- 1.9.3