DPDK patches and discussions
 help / color / mirror / Atom feed
From: rsanford2@gmail.com
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2] malloc: fix malloc and free linear complexity
Date: Fri, 16 May 2014 08:59:01 -0400	[thread overview]
Message-ID: <1400245141-10938-1-git-send-email-rsanford2@gmail.com> (raw)

Problems with lib rte_malloc:
 1. Rte_malloc searches a heap's entire free list looking for
    the best fit, resulting in linear complexity.
 2. Heaps store free blocks in a singly-linked list, resulting
    in linear complexity when rte_free needs to remove an
    adjacent block.
 3. The library inserts and removes free blocks with ad hoc,
    in-line code, rather than using linked-list functions or
    macros.

This patch addresses those problems as follows:
 1. Replace single free list with a handful of free lists.
    Each free list contains blocks of a specified size range,
    for example:
      list[0]: (0   , 2^7]
      list[1]: (2^7 , 2^9]
      list[2]: (2^9 , 2^11]
      list[3]: (2^11, 2^13]
      list[4]: (2^13, MAX_SIZE]

    When allocating a block, start at the first list that can
    contain a big enough block. Search subsequent lists, if
    necessary. Terminate the search as soon as we find a block
    that is big enough.
 2. Use doubly-linked lists, so that we can remove free blocks
    in constant time.
 3. Use BSD LIST macros, as defined in sys/queue.h and the
    QUEUE(3) man page.

Signed-off-by: Robert Sanford <rsanford2@gmail.com>
---
 lib/librte_eal/common/include/rte_malloc_heap.h |    6 +-
 lib/librte_malloc/malloc_elem.c                 |  121 +++++++++++++++--------
 lib/librte_malloc/malloc_elem.h                 |   17 +++-
 lib/librte_malloc/malloc_heap.c                 |   67 ++++++-------
 4 files changed, 128 insertions(+), 83 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index 5e139cf..1f5d653 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -35,14 +35,18 @@
 #define _RTE_MALLOC_HEAP_H_
 
 #include <stddef.h>
+#include <sys/queue.h>
 #include <rte_spinlock.h>
 
+/* Number of free lists per heap, grouped by size. */
+#define RTE_HEAP_NUM_FREELISTS  5
+
 /**
  * Structure to hold malloc heap
  */
 struct malloc_heap {
 	rte_spinlock_t lock;
-	struct malloc_elem * volatile free_head;
+	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
 	unsigned mz_count;
 	unsigned alloc_count;
 	size_t total_size;
diff --git a/lib/librte_malloc/malloc_elem.c b/lib/librte_malloc/malloc_elem.c
index f0da640..13cd5d3 100644
--- a/lib/librte_malloc/malloc_elem.c
+++ b/lib/librte_malloc/malloc_elem.c
@@ -33,6 +33,7 @@
 #include <stdint.h>
 #include <stddef.h>
 #include <stdio.h>
+#include <string.h>
 #include <sys/queue.h>
 
 #include <rte_memory.h>
@@ -60,7 +61,8 @@ malloc_elem_init(struct malloc_elem *elem,
 {
 	elem->heap = heap;
 	elem->mz = mz;
-	elem->prev = elem->next_free = NULL;
+	elem->prev = NULL;
+	memset(&elem->free_list, 0, sizeof(elem->free_list));
 	elem->state = ELEM_FREE;
 	elem->size = size;
 	elem->pad = 0;
@@ -125,14 +127,71 @@ split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
 }
 
 /*
+ * Given an element size, compute its freelist index.
+ * We free an element into the freelist containing similarly-sized elements.
+ * We try to allocate elements starting with the freelist containing
+ * similarly-sized elements, and if necessary, we search freelists
+ * containing larger elements.
+ *
+ * Example element size ranges for a heap with five free lists:
+ *   heap->free_head[0] - (0   , 2^7]
+ *   heap->free_head[1] - (2^7 , 2^9]
+ *   heap->free_head[2] - (2^9 , 2^11]
+ *   heap->free_head[3] - (2^11, 2^13]
+ *   heap->free_head[4] - (2^13, MAX_SIZE]
+ */
+size_t
+malloc_elem_free_list_index(size_t size)
+{
+#define MALLOC_MINSIZE_LOG2   7
+#define MALLOC_LOG2_INCREMENT 2
+
+	size_t log2;
+	size_t index;
+
+	if (size <= (1UL << MALLOC_MINSIZE_LOG2))
+		return 0;
+
+	/* Find next power of 2 >= size. */
+	log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
+
+	/* Compute freelist index, based on log2(size). */
+	index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
+	        MALLOC_LOG2_INCREMENT;
+
+	return (index <= RTE_HEAP_NUM_FREELISTS-1?
+	        index: RTE_HEAP_NUM_FREELISTS-1);
+}
+
+/*
+ * Add the specified element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem)
+{
+	size_t idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
+
+	elem->state = ELEM_FREE;
+	LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
+}
+
+/*
+ * Remove the specified element from its heap's free list.
+ */
+static void
+elem_free_list_remove(struct malloc_elem *elem)
+{
+	LIST_REMOVE(elem, free_list);
+}
+
+/*
  * reserve a block of data in an existing malloc_elem. If the malloc_elem
  * is much larger than the data block requested, we split the element in two.
  * This function is only called from malloc_heap_alloc so parameter checking
  * is not done here, as it's done there previously.
  */
 struct malloc_elem *
-malloc_elem_alloc(struct malloc_elem *elem, size_t size,
-		unsigned align, struct malloc_elem *prev_free)
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align)
 {
 	struct malloc_elem *new_elem = elem_start_pt(elem, size, align);
 	const unsigned old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
@@ -151,20 +210,20 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size,
 			set_header(new_elem);
 		}
 		/* remove element from free list */
-		if (prev_free == NULL)
-			elem->heap->free_head = elem->next_free;
-		else
-			prev_free->next_free = elem->next_free;
+		elem_free_list_remove(elem);
 
 		return new_elem;
 	}
 
 	/* we are going to split the element in two. The original element
-	 * remains free, and the new element is the one allocated, so no free list
-	 * changes need to be made.
+	 * remains free, and the new element is the one allocated.
+	 * Re-insert original element, in case its new size makes it
+	 * belong on a different list.
 	 */
+	elem_free_list_remove(elem);
 	split_elem(elem, new_elem);
 	new_elem->state = ELEM_BUSY;
+	malloc_elem_free_list_insert(elem);
 
 	return new_elem;
 }
@@ -182,25 +241,6 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
 }
 
 /*
- * scan the free list, and remove the request element from that
- * free list. (Free list to scan is got from heap pointer in element)
- */
-static inline void
-remove_from_free_list(struct malloc_elem *elem)
-{
-	if (elem == elem->heap->free_head)
-		elem->heap->free_head = elem->next_free;
-	else{
-		struct malloc_elem *prev_free = elem->heap->free_head;
-		while (prev_free && prev_free->next_free != elem)
-			prev_free = prev_free->next_free;
-		if (!prev_free)
-			rte_panic("Corrupted free list\n");
-		prev_free->next_free = elem->next_free;
-	}
-}
-
-/*
  * free a malloc_elem block by adding it to the free list. If the
  * blocks either immediately before or immediately after newly freed block
  * are also free, the blocks are merged together.
@@ -214,21 +254,22 @@ malloc_elem_free(struct malloc_elem *elem)
 	rte_spinlock_lock(&(elem->heap->lock));
 	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
 	if (next->state == ELEM_FREE){
-		/* join to this one, and remove from free list */
+		/* remove from free list, join to this one */
+		elem_free_list_remove(next);
 		join_elem(elem, next);
-		remove_from_free_list(next);
 	}
 
 	/* check if previous element is free, if so join with it and return,
-	 * no need to update free list, as that element is already there
+	 * need to re-insert in free list, as that element's size is changing
 	 */
-	if (elem->prev != NULL && elem->prev->state == ELEM_FREE)
+	if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
+		elem_free_list_remove(elem->prev);
 		join_elem(elem->prev, elem);
+		malloc_elem_free_list_insert(elem->prev);
+	}
 	/* otherwise add ourselves to the free list */
 	else {
-		elem->next_free = elem->heap->free_head;
-		elem->heap->free_head = elem;
-		elem->state = ELEM_FREE;
+		malloc_elem_free_list_insert(elem);
 		elem->pad = 0;
 	}
 	/* decrease heap's count of allocated elements */
@@ -258,20 +299,18 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
 	if (current_size + next->size < new_size)
 		goto err_return;
 
-	/* we now know the element fits, so join the two, then remove from free
-	 * list
+	/* we now know the element fits, so remove from free list,
+	 * join the two
 	 */
+	elem_free_list_remove(next);
 	join_elem(elem, next);
-	remove_from_free_list(next);
 
 	if (elem->size - new_size > MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){
 		/* now we have a big block together. Lets cut it down a bit, by splitting */
 		struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
 		split_pt = RTE_PTR_ALIGN_CEIL(split_pt, CACHE_LINE_SIZE);
 		split_elem(elem, split_pt);
-		split_pt->state = ELEM_FREE;
-		split_pt->next_free = elem->heap->free_head;
-		elem->heap->free_head = split_pt;
+		malloc_elem_free_list_insert(split_pt);
 	}
 	rte_spinlock_unlock(&elem->heap->lock);
 	return 0;
diff --git a/lib/librte_malloc/malloc_elem.h b/lib/librte_malloc/malloc_elem.h
index eadecf9..63c69e1 100644
--- a/lib/librte_malloc/malloc_elem.h
+++ b/lib/librte_malloc/malloc_elem.h
@@ -46,7 +46,7 @@ enum elem_state {
 struct malloc_elem {
 	struct malloc_heap *heap;
 	struct malloc_elem *volatile prev;      /* points to prev elem in memzone */
-	struct malloc_elem *volatile next_free; /* to make list of free elements */
+	LIST_ENTRY(malloc_elem) free_list;      /* list of free elements in heap */
 	const struct rte_memzone *mz;
 	volatile enum elem_state state;
 	uint32_t pad;
@@ -156,8 +156,7 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align);
  * is much larger than the data block requested, we split the element in two.
  */
 struct malloc_elem *
-malloc_elem_alloc(struct malloc_elem *elem, size_t size,
-		unsigned align, struct malloc_elem *prev_free);
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align);
 
 /*
  * free a malloc_elem block by adding it to the free list. If the
@@ -174,4 +173,16 @@ malloc_elem_free(struct malloc_elem *elem);
 int
 malloc_elem_resize(struct malloc_elem *elem, size_t size);
 
+/*
+ * Given an element size, compute its freelist index.
+ */
+size_t
+malloc_elem_free_list_index(size_t size);
+
+/*
+ * Add element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem);
+
 #endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_malloc/malloc_heap.c b/lib/librte_malloc/malloc_heap.c
index 882749c..cfc02f1 100644
--- a/lib/librte_malloc/malloc_heap.c
+++ b/lib/librte_malloc/malloc_heap.c
@@ -114,9 +114,8 @@ malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align)
 	const unsigned elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
 	malloc_elem_init(start_elem, heap, mz, elem_size);
 	malloc_elem_mkend(end_elem, start_elem);
+	malloc_elem_free_list_insert(start_elem);
 
-	start_elem->next_free = heap->free_head;
-	heap->free_head = start_elem;
 	/* increase heap total size by size of new memzone */
 	heap->total_size+=mz_size - MALLOC_ELEM_OVERHEAD;
 	return 0;
@@ -125,35 +124,25 @@ malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align)
 /*
  * Iterates through the freelist for a heap to find a free element
  * which can store data of the required size and with the requested alignment.
- * Returns null on failure, or pointer to element on success, with the pointer
- * to the previous element in the list, if any, being returned in a parameter
- * (to make removing the element from the free list faster).
+ * Returns null on failure, or pointer to element on success.
  */
 static struct malloc_elem *
-find_suitable_element(struct malloc_heap *heap, size_t size,
-		unsigned align, struct malloc_elem **prev)
+find_suitable_element(struct malloc_heap *heap, size_t size, unsigned align)
 {
-	struct malloc_elem *elem, *min_elem, *min_prev;
-	size_t min_sz;
-
-	elem = heap->free_head;
-	min_elem = NULL;
-	min_prev = NULL;
-	min_sz = (size_t) SIZE_MAX;
-	*prev = NULL;
-
-	while(elem){
-		if (malloc_elem_can_hold(elem, size, align)) {
-			if (min_sz > elem->size) {
-				min_elem = elem;
-				*prev = min_prev;
-				min_sz = elem->size;
-			}
+	size_t idx;
+	struct malloc_elem *elem;
+
+	for (idx = malloc_elem_free_list_index(size);
+		idx < RTE_HEAP_NUM_FREELISTS; idx++)
+	{
+		for (elem = LIST_FIRST(&heap->free_head[idx]);
+			!!elem; elem = LIST_NEXT(elem, free_list))
+		{
+			if (malloc_elem_can_hold(elem, size, align))
+				return elem;
 		}
-		min_prev = elem;
-		elem = elem->next_free;
 	}
-	return (min_elem);
+	return NULL;
 }
 
 /*
@@ -169,15 +158,14 @@ malloc_heap_alloc(struct malloc_heap *heap,
 	size = CACHE_LINE_ROUNDUP(size);
 	align = CACHE_LINE_ROUNDUP(align);
 	rte_spinlock_lock(&heap->lock);
-	struct malloc_elem *prev, *elem = find_suitable_element(heap,
-			size, align, &prev);
+	struct malloc_elem *elem = find_suitable_element(heap, size, align);
 	if (elem == NULL){
 		if ((malloc_heap_add_memzone(heap, size, align)) == 0)
-			elem = find_suitable_element(heap, size, align, &prev);
+			elem = find_suitable_element(heap, size, align);
 	}
 
 	if (elem != NULL){
-		elem = malloc_elem_alloc(elem, size, align, prev);
+		elem = malloc_elem_alloc(elem, size, align);
 		/* increase heap's count of allocated elements */
 		heap->alloc_count++;
 	}
@@ -193,7 +181,8 @@ int
 malloc_heap_get_stats(const struct malloc_heap *heap,
 		struct rte_malloc_socket_stats *socket_stats)
 {
-	struct malloc_elem *elem = heap->free_head;
+	size_t idx;
+	struct malloc_elem *elem;
 
 	/* Initialise variables for heap */
 	socket_stats->free_count = 0;
@@ -201,13 +190,15 @@ malloc_heap_get_stats(const struct malloc_heap *heap,
 	socket_stats->greatest_free_size = 0;
 
 	/* Iterate through free list */
-	while(elem) {
-		socket_stats->free_count++;
-		socket_stats->heap_freesz_bytes += elem->size;
-		if (elem->size > socket_stats->greatest_free_size)
-			socket_stats->greatest_free_size = elem->size;
-
-		elem = elem->next_free;
+	for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+		for (elem = LIST_FIRST(&heap->free_head[idx]);
+			!!elem; elem = LIST_NEXT(elem, free_list))
+		{
+			socket_stats->free_count++;
+			socket_stats->heap_freesz_bytes += elem->size;
+			if (elem->size > socket_stats->greatest_free_size)
+				socket_stats->greatest_free_size = elem->size;
+		}
 	}
 	/* Get stats on overall heap and allocated memory on this heap */
 	socket_stats->heap_totalsz_bytes = heap->total_size;
-- 
1.7.1

             reply	other threads:[~2014-05-16 12:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-16 12:59 rsanford2 [this message]
2014-06-17 16:05 ` De Lara Guarch, Pablo
2014-06-17 16:29   ` Robert Sanford
2014-06-23 21:17 ` [dpdk-dev] [PATCH v3 0/2] " Robert Sanford
2014-06-24 14:55   ` De Lara Guarch, Pablo
2014-06-26 12:37     ` Thomas Monjalon
2014-06-23 21:17 ` [dpdk-dev] [PATCH v3 1/2] " Robert Sanford
2014-06-23 21:17 ` [dpdk-dev] [PATCH v3 2/2] " Robert Sanford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1400245141-10938-1-git-send-email-rsanford2@gmail.com \
    --to=rsanford2@gmail.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).