From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-yh0-f49.google.com (mail-yh0-f49.google.com [209.85.213.49]) by dpdk.org (Postfix) with ESMTP id 2C12B47D2 for ; Mon, 23 Jun 2014 23:17:21 +0200 (CEST) Received: by mail-yh0-f49.google.com with SMTP id f73so5533468yha.36 for ; Mon, 23 Jun 2014 14:17:39 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:subject:date:message-id:in-reply-to:references; bh=lVfDDkUVNBrTaWLOch7Yo/nqB0MT20WjqmvHpCb+1Go=; b=BrHHDYEnBIKYJLXTb1EDX3ykcYqwg7kDrKKtn149T0TeizQ9kq87/7yXK2xkoBPkSc LdrpFIFCsMszLoxpN8b9OQv7Lxlpvof0ArEbB3h018qWSJrb63AjuWbW5L0fQS7dYuBm dhfeIMOM3bgESXe8EeeJzPIqBgBqaavZR2Q1e1mPHPDljUGFNPgn7hp76fnK+1V24LbT 5g7S0YOUiYlAuZTZk5aPM24uWpQWfAcQSng7SqBZ5bLBx66HjRWJpSrNTDXVbDV+gz71 Iv8f5PkdE8pKVkDJz85E9z/lRNX853gn56lBeytbD0wkzdBeLnUNOxqC+vAPkyX5SmoM SUig== X-Received: by 10.236.229.133 with SMTP id h5mr38193311yhq.64.1403558259024; Mon, 23 Jun 2014 14:17:39 -0700 (PDT) Received: from localhost.localdomain ([69.80.69.242]) by mx.google.com with ESMTPSA id m25sm31531405yhc.12.2014.06.23.14.17.37 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 23 Jun 2014 14:17:38 -0700 (PDT) From: Robert Sanford To: dev@dpdk.org Date: Mon, 23 Jun 2014 17:17:09 -0400 Message-Id: <1403558230-40042-2-git-send-email-rsanford2@gmail.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1400245141-10938-1-git-send-email-rsanford2@gmail.com> References: <1400245141-10938-1-git-send-email-rsanford2@gmail.com> Subject: [dpdk-dev] [PATCH v3 1/2] malloc: fix malloc and free linear complexity X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 23 Jun 2014 21:17:22 -0000 Problems with lib rte_malloc: 1. Rte_malloc searches a heap's entire free list looking for the best fit, resulting in linear complexity. 2. Heaps store free blocks in a singly-linked list, resulting in linear complexity when rte_free needs to remove an adjacent block. 3. The library inserts and removes free blocks with ad hoc, in-line code, rather than using linked-list functions or macros. 4. The library wastes potential small blocks of size 64 and 128 bytes (plus overhead of 64 bytes) as padding when reusing free blocks or resizing allocated blocks. This patch addresses those problems as follows: 1. Replace single free list with a handful of free lists. Each free list contains blocks of a specified size range, for example: list[0]: (0 , 2^8] list[1]: (2^8 , 2^10] list[2]: (2^10, 2^12] list[3]: (2^12, 2^14] list[4]: (2^14, MAX_SIZE] When allocating a block, start at the first list that can contain a big enough block. Search subsequent lists, if necessary. Terminate the search as soon as we find a block that is big enough. 2. Use doubly-linked lists, so that we can remove free blocks in constant time. 3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3) man page. 4. Change code to utilize small blocks of data size 64 and 128, when splitting larger blocks. Signed-off-by: Robert Sanford --- lib/librte_eal/common/include/rte_malloc_heap.h | 6 +- lib/librte_malloc/malloc_elem.c | 127 +++++++++++++++-------- lib/librte_malloc/malloc_elem.h | 17 +++- lib/librte_malloc/malloc_heap.c | 67 +++++------- 4 files changed, 131 insertions(+), 86 deletions(-) diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h index fc4fd0a..f727b7a 100644 --- a/lib/librte_eal/common/include/rte_malloc_heap.h +++ b/lib/librte_eal/common/include/rte_malloc_heap.h @@ -35,14 +35,18 @@ #define _RTE_MALLOC_HEAP_H_ #include +#include #include +/* Number of free lists per heap, grouped by size. */ +#define RTE_HEAP_NUM_FREELISTS 5 + /** * Structure to hold malloc heap */ struct malloc_heap { rte_spinlock_t lock; - struct malloc_elem * volatile free_head; + LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS]; unsigned mz_count; unsigned alloc_count; size_t total_size; diff --git a/lib/librte_malloc/malloc_elem.c b/lib/librte_malloc/malloc_elem.c index 172da69..75a94d0 100644 --- a/lib/librte_malloc/malloc_elem.c +++ b/lib/librte_malloc/malloc_elem.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,7 @@ #include "malloc_elem.h" #include "malloc_heap.h" -#define MIN_DATA_SIZE (CACHE_LINE_SIZE * 2) +#define MIN_DATA_SIZE (CACHE_LINE_SIZE) /* * initialise a general malloc_elem header structure @@ -60,7 +61,8 @@ malloc_elem_init(struct malloc_elem *elem, { elem->heap = heap; elem->mz = mz; - elem->prev = elem->next_free = NULL; + elem->prev = NULL; + memset(&elem->free_list, 0, sizeof(elem->free_list)); elem->state = ELEM_FREE; elem->size = size; elem->pad = 0; @@ -125,19 +127,76 @@ split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) } /* + * Given an element size, compute its freelist index. + * We free an element into the freelist containing similarly-sized elements. + * We try to allocate elements starting with the freelist containing + * similarly-sized elements, and if necessary, we search freelists + * containing larger elements. + * + * Example element size ranges for a heap with five free lists: + * heap->free_head[0] - (0 , 2^8] + * heap->free_head[1] - (2^8 , 2^10] + * heap->free_head[2] - (2^10 ,2^12] + * heap->free_head[3] - (2^12, 2^14] + * heap->free_head[4] - (2^14, MAX_SIZE] + */ +size_t +malloc_elem_free_list_index(size_t size) +{ +#define MALLOC_MINSIZE_LOG2 8 +#define MALLOC_LOG2_INCREMENT 2 + + size_t log2; + size_t index; + + if (size <= (1UL << MALLOC_MINSIZE_LOG2)) + return 0; + + /* Find next power of 2 >= size. */ + log2 = sizeof(size) * 8 - __builtin_clzl(size-1); + + /* Compute freelist index, based on log2(size). */ + index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) / + MALLOC_LOG2_INCREMENT; + + return (index <= RTE_HEAP_NUM_FREELISTS-1? + index: RTE_HEAP_NUM_FREELISTS-1); +} + +/* + * Add the specified element to its heap's free list. + */ +void +malloc_elem_free_list_insert(struct malloc_elem *elem) +{ + size_t idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); + + elem->state = ELEM_FREE; + LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list); +} + +/* + * Remove the specified element from its heap's free list. + */ +static void +elem_free_list_remove(struct malloc_elem *elem) +{ + LIST_REMOVE(elem, free_list); +} + +/* * reserve a block of data in an existing malloc_elem. If the malloc_elem * is much larger than the data block requested, we split the element in two. * This function is only called from malloc_heap_alloc so parameter checking * is not done here, as it's done there previously. */ struct malloc_elem * -malloc_elem_alloc(struct malloc_elem *elem, size_t size, - unsigned align, struct malloc_elem *prev_free) +malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align) { struct malloc_elem *new_elem = elem_start_pt(elem, size, align); const unsigned old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; - if (old_elem_size <= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE){ + if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE){ /* don't split it, pad the element instead */ elem->state = ELEM_BUSY; elem->pad = old_elem_size; @@ -151,20 +210,20 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, set_header(new_elem); } /* remove element from free list */ - if (prev_free == NULL) - elem->heap->free_head = elem->next_free; - else - prev_free->next_free = elem->next_free; + elem_free_list_remove(elem); return new_elem; } /* we are going to split the element in two. The original element - * remains free, and the new element is the one allocated, so no free list - * changes need to be made. + * remains free, and the new element is the one allocated. + * Re-insert original element, in case its new size makes it + * belong on a different list. */ + elem_free_list_remove(elem); split_elem(elem, new_elem); new_elem->state = ELEM_BUSY; + malloc_elem_free_list_insert(elem); return new_elem; } @@ -182,25 +241,6 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) } /* - * scan the free list, and remove the request element from that - * free list. (Free list to scan is got from heap pointer in element) - */ -static inline void -remove_from_free_list(struct malloc_elem *elem) -{ - if (elem == elem->heap->free_head) - elem->heap->free_head = elem->next_free; - else{ - struct malloc_elem *prev_free = elem->heap->free_head; - while (prev_free && prev_free->next_free != elem) - prev_free = prev_free->next_free; - if (!prev_free) - rte_panic("Corrupted free list\n"); - prev_free->next_free = elem->next_free; - } -} - -/* * free a malloc_elem block by adding it to the free list. If the * blocks either immediately before or immediately after newly freed block * are also free, the blocks are merged together. @@ -214,21 +254,22 @@ malloc_elem_free(struct malloc_elem *elem) rte_spinlock_lock(&(elem->heap->lock)); struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); if (next->state == ELEM_FREE){ - /* join to this one, and remove from free list */ + /* remove from free list, join to this one */ + elem_free_list_remove(next); join_elem(elem, next); - remove_from_free_list(next); } /* check if previous element is free, if so join with it and return, - * no need to update free list, as that element is already there + * need to re-insert in free list, as that element's size is changing */ - if (elem->prev != NULL && elem->prev->state == ELEM_FREE) + if (elem->prev != NULL && elem->prev->state == ELEM_FREE) { + elem_free_list_remove(elem->prev); join_elem(elem->prev, elem); + malloc_elem_free_list_insert(elem->prev); + } /* otherwise add ourselves to the free list */ else { - elem->next_free = elem->heap->free_head; - elem->heap->free_head = elem; - elem->state = ELEM_FREE; + malloc_elem_free_list_insert(elem); elem->pad = 0; } /* decrease heap's count of allocated elements */ @@ -258,20 +299,18 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size) if (current_size + next->size < new_size) goto err_return; - /* we now know the element fits, so join the two, then remove from free - * list + /* we now know the element fits, so remove from free list, + * join the two */ + elem_free_list_remove(next); join_elem(elem, next); - remove_from_free_list(next); - if (elem->size - new_size > MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){ + if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){ /* now we have a big block together. Lets cut it down a bit, by splitting */ struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size); split_pt = RTE_PTR_ALIGN_CEIL(split_pt, CACHE_LINE_SIZE); split_elem(elem, split_pt); - split_pt->state = ELEM_FREE; - split_pt->next_free = elem->heap->free_head; - elem->heap->free_head = split_pt; + malloc_elem_free_list_insert(split_pt); } rte_spinlock_unlock(&elem->heap->lock); return 0; diff --git a/lib/librte_malloc/malloc_elem.h b/lib/librte_malloc/malloc_elem.h index cd25384..1d666a5 100644 --- a/lib/librte_malloc/malloc_elem.h +++ b/lib/librte_malloc/malloc_elem.h @@ -46,7 +46,7 @@ enum elem_state { struct malloc_elem { struct malloc_heap *heap; struct malloc_elem *volatile prev; /* points to prev elem in memzone */ - struct malloc_elem *volatile next_free; /* to make list of free elements */ + LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */ const struct rte_memzone *mz; volatile enum elem_state state; uint32_t pad; @@ -156,8 +156,7 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align); * is much larger than the data block requested, we split the element in two. */ struct malloc_elem * -malloc_elem_alloc(struct malloc_elem *elem, size_t size, - unsigned align, struct malloc_elem *prev_free); +malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align); /* * free a malloc_elem block by adding it to the free list. If the @@ -174,4 +173,16 @@ malloc_elem_free(struct malloc_elem *elem); int malloc_elem_resize(struct malloc_elem *elem, size_t size); +/* + * Given an element size, compute its freelist index. + */ +size_t +malloc_elem_free_list_index(size_t size); + +/* + * Add element to its heap's free list. + */ +void +malloc_elem_free_list_insert(struct malloc_elem *elem); + #endif /* MALLOC_ELEM_H_ */ diff --git a/lib/librte_malloc/malloc_heap.c b/lib/librte_malloc/malloc_heap.c index 6e99251..a36cd92 100644 --- a/lib/librte_malloc/malloc_heap.c +++ b/lib/librte_malloc/malloc_heap.c @@ -114,9 +114,8 @@ malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align) const unsigned elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; malloc_elem_init(start_elem, heap, mz, elem_size); malloc_elem_mkend(end_elem, start_elem); + malloc_elem_free_list_insert(start_elem); - start_elem->next_free = heap->free_head; - heap->free_head = start_elem; /* increase heap total size by size of new memzone */ heap->total_size+=mz_size - MALLOC_ELEM_OVERHEAD; return 0; @@ -125,35 +124,25 @@ malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align) /* * Iterates through the freelist for a heap to find a free element * which can store data of the required size and with the requested alignment. - * Returns null on failure, or pointer to element on success, with the pointer - * to the previous element in the list, if any, being returned in a parameter - * (to make removing the element from the free list faster). + * Returns null on failure, or pointer to element on success. */ static struct malloc_elem * -find_suitable_element(struct malloc_heap *heap, size_t size, - unsigned align, struct malloc_elem **prev) +find_suitable_element(struct malloc_heap *heap, size_t size, unsigned align) { - struct malloc_elem *elem, *min_elem, *min_prev; - size_t min_sz; - - elem = heap->free_head; - min_elem = NULL; - min_prev = NULL; - min_sz = (size_t) SIZE_MAX; - *prev = NULL; - - while(elem){ - if (malloc_elem_can_hold(elem, size, align)) { - if (min_sz > elem->size) { - min_elem = elem; - *prev = min_prev; - min_sz = elem->size; - } + size_t idx; + struct malloc_elem *elem; + + for (idx = malloc_elem_free_list_index(size); + idx < RTE_HEAP_NUM_FREELISTS; idx++) + { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) + { + if (malloc_elem_can_hold(elem, size, align)) + return elem; } - min_prev = elem; - elem = elem->next_free; } - return (min_elem); + return NULL; } /* @@ -169,15 +158,14 @@ malloc_heap_alloc(struct malloc_heap *heap, size = CACHE_LINE_ROUNDUP(size); align = CACHE_LINE_ROUNDUP(align); rte_spinlock_lock(&heap->lock); - struct malloc_elem *prev, *elem = find_suitable_element(heap, - size, align, &prev); + struct malloc_elem *elem = find_suitable_element(heap, size, align); if (elem == NULL){ if ((malloc_heap_add_memzone(heap, size, align)) == 0) - elem = find_suitable_element(heap, size, align, &prev); + elem = find_suitable_element(heap, size, align); } if (elem != NULL){ - elem = malloc_elem_alloc(elem, size, align, prev); + elem = malloc_elem_alloc(elem, size, align); /* increase heap's count of allocated elements */ heap->alloc_count++; } @@ -193,7 +181,8 @@ int malloc_heap_get_stats(const struct malloc_heap *heap, struct rte_malloc_socket_stats *socket_stats) { - struct malloc_elem *elem = heap->free_head; + size_t idx; + struct malloc_elem *elem; /* Initialise variables for heap */ socket_stats->free_count = 0; @@ -201,13 +190,15 @@ malloc_heap_get_stats(const struct malloc_heap *heap, socket_stats->greatest_free_size = 0; /* Iterate through free list */ - while(elem) { - socket_stats->free_count++; - socket_stats->heap_freesz_bytes += elem->size; - if (elem->size > socket_stats->greatest_free_size) - socket_stats->greatest_free_size = elem->size; - - elem = elem->next_free; + for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) + { + socket_stats->free_count++; + socket_stats->heap_freesz_bytes += elem->size; + if (elem->size > socket_stats->greatest_free_size) + socket_stats->greatest_free_size = elem->size; + } } /* Get stats on overall heap and allocated memory on this heap */ socket_stats->heap_totalsz_bytes = heap->total_size; -- 1.7.1