DPDK patches and discussions
 help / color / mirror / Atom feed
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Cc: andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com,
	keith.wiles@intel.com, benjamin.walker@intel.com,
	bruce.richardson@intel.com, thomas@monjalon.net
Subject: [dpdk-dev] [RFC 06/23] eal/malloc: make malloc a doubly-linked list
Date: Tue, 19 Dec 2017 11:04:27 +0000	[thread overview]
Message-ID: <b1534e04ab11fa6278ed7eba16b734ca641832b7.1513594170.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <cover.1513680516.git.anatoly.burakov@intel.com>
In-Reply-To: <cover.1513594170.git.anatoly.burakov@intel.com>

As we are preparing for dynamic memory allocation, we need to be
able to handle holes in our malloc heap, hence we're switching to
doubly linked list, and prepare infrastructure to support it.

Since our heap is now aware where are our first and last elements,
there is no longer any need to have a dummy element at the end of
each heap, so get rid of that as well. Instead, let insert/remove/
join/split operations handle end-of-list conditions automatically.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/include/rte_malloc_heap.h |   6 +
 lib/librte_eal/common/malloc_elem.c             | 196 +++++++++++++++++++-----
 lib/librte_eal/common/malloc_elem.h             |   7 +-
 lib/librte_eal/common/malloc_heap.c             |   8 +-
 4 files changed, 170 insertions(+), 47 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index b270356..48a46c9 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -42,12 +42,18 @@
 /* Number of free lists per heap, grouped by size. */
 #define RTE_HEAP_NUM_FREELISTS  13
 
+/* dummy definition, for pointers */
+struct malloc_elem;
+
 /**
  * Structure to hold malloc heap
  */
 struct malloc_heap {
 	rte_spinlock_t lock;
 	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
+	struct malloc_elem *first;
+	struct malloc_elem *last;
+
 	unsigned alloc_count;
 	size_t total_size;
 } __rte_cache_aligned;
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 6b4f2a5..7609a9b 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -60,6 +60,7 @@ malloc_elem_init(struct malloc_elem *elem,
 	elem->heap = heap;
 	elem->ms = ms;
 	elem->prev = NULL;
+	elem->next = NULL;
 	memset(&elem->free_list, 0, sizeof(elem->free_list));
 	elem->state = ELEM_FREE;
 	elem->size = size;
@@ -68,15 +69,56 @@ malloc_elem_init(struct malloc_elem *elem,
 	set_trailer(elem);
 }
 
-/*
- * Initialize a dummy malloc_elem header for the end-of-memseg marker
- */
 void
-malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
+malloc_elem_insert(struct malloc_elem *elem)
 {
-	malloc_elem_init(elem, prev->heap, prev->ms, 0);
-	elem->prev = prev;
-	elem->state = ELEM_BUSY; /* mark busy so its never merged */
+	struct malloc_elem *prev_elem, *next_elem;
+	struct malloc_heap *heap = elem->heap;
+
+	if (heap->first == NULL && heap->last == NULL) {
+		/* if empty heap */
+		heap->first = elem;
+		heap->last = elem;
+		prev_elem = NULL;
+		next_elem = NULL;
+	} else if (elem < heap->first) {
+		/* if lower than start */
+		prev_elem = NULL;
+		next_elem = heap->first;
+		heap->first = elem;
+	} else if (elem > heap->last) {
+		/* if higher than end */
+		prev_elem = heap->last;
+		next_elem = NULL;
+		heap->last = elem;
+	} else {
+		/* the new memory is somewhere inbetween start and end */
+		uint64_t dist_from_start, dist_from_end;
+
+		dist_from_end = RTE_PTR_DIFF(heap->last, elem);
+		dist_from_start = RTE_PTR_DIFF(elem, heap->first);
+
+		/* check which is closer, and find closest list entries */
+		if (dist_from_start < dist_from_end) {
+			prev_elem = heap->first;
+			while (prev_elem->next < elem)
+				prev_elem = prev_elem->next;
+			next_elem = prev_elem->next;
+		} else {
+			next_elem = heap->last;
+			while (next_elem->prev > elem)
+				next_elem = next_elem->prev;
+			prev_elem = next_elem->prev;
+		}
+	}
+
+	/* insert new element */
+	elem->prev = prev_elem;
+	elem->next = next_elem;
+	if (prev_elem)
+		prev_elem->next = elem;
+	if (next_elem)
+		next_elem->prev = elem;
 }
 
 /*
@@ -126,18 +168,55 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size,	unsigned align,
 static void
 split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
 {
-	struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size);
+	struct malloc_elem *next_elem = elem->next;
 	const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
 	const size_t new_elem_size = elem->size - old_elem_size;
 
 	malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
 	split_pt->prev = elem;
-	next_elem->prev = split_pt;
+	split_pt->next = next_elem;
+	if (next_elem)
+		next_elem->prev = split_pt;
+	else
+		elem->heap->last = split_pt;
+	elem->next = split_pt;
 	elem->size = old_elem_size;
 	set_trailer(elem);
 }
 
 /*
+ * our malloc heap is a doubly linked list, so doubly remove our element.
+ */
+static void __rte_unused
+remove_elem(struct malloc_elem *elem) {
+	struct malloc_elem *next, *prev;
+	next = elem->next;
+	prev = elem->prev;
+
+	if (next)
+		next->prev = prev;
+	else
+		elem->heap->last = prev;
+	if (prev)
+		prev->next = next;
+	else
+		elem->heap->first = next;
+
+	elem->prev = NULL;
+	elem->next = NULL;
+}
+
+static int
+next_elem_is_adjacent(struct malloc_elem *elem) {
+	return elem->next == RTE_PTR_ADD(elem, elem->size);
+}
+
+static int
+prev_elem_is_adjacent(struct malloc_elem *elem) {
+	return elem == RTE_PTR_ADD(elem->prev, elem->prev->size);
+}
+
+/*
  * Given an element size, compute its freelist index.
  * We free an element into the freelist containing similarly-sized elements.
  * We try to allocate elements starting with the freelist containing
@@ -220,6 +299,9 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
 
 		split_elem(elem, new_free_elem);
 		malloc_elem_free_list_insert(new_free_elem);
+
+		if (elem == elem->heap->last)
+			elem->heap->last = new_free_elem;
 	}
 
 	if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
@@ -258,9 +340,61 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
 static inline void
 join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
 {
-	struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size);
+	struct malloc_elem *next = elem2->next;
 	elem1->size += elem2->size;
-	next->prev = elem1;
+	if (next)
+		next->prev = elem1;
+	else
+		elem1->heap->last = elem1;
+	elem1->next = next;
+}
+
+static struct malloc_elem *
+elem_join_adjacent_free(struct malloc_elem *elem) {
+	/*
+	 * check if next element exists, is adjacent and is free, if so join
+	 * with it, need to remove from free list.
+	 */
+	if (elem->next != NULL && elem->next->state == ELEM_FREE &&
+			next_elem_is_adjacent(elem)){
+		void *erase;
+
+		/* we will want to erase the trailer and header */
+		erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+
+		/* remove from free list, join to this one */
+		elem_free_list_remove(elem->next);
+		join_elem(elem, elem->next);
+
+		/* erase header and trailer */
+		memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+	}
+
+	/*
+	 * check if prev element exists, is adjacent and is free, if so join
+	 * with it, need to remove from free list.
+	 */
+	if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
+			prev_elem_is_adjacent(elem)) {
+		struct malloc_elem *new_elem;
+		void *erase;
+
+		/* we will want to erase trailer and header */
+		erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+
+		/* remove from free list, join to this one */
+		elem_free_list_remove(elem->prev);
+
+		new_elem = elem->prev;
+		join_elem(new_elem, elem);
+
+		/* erase header and trailer */
+		memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+
+		elem = new_elem;
+	}
+
+	return elem;
 }
 
 /*
@@ -271,32 +405,20 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
 int
 malloc_elem_free(struct malloc_elem *elem)
 {
-	size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN;
-	uint8_t *ptr = (uint8_t *)&elem[1];
-	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
-	if (next->state == ELEM_FREE){
-		/* remove from free list, join to this one */
-		elem_free_list_remove(next);
-		join_elem(elem, next);
-		sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
-	}
+	void *ptr;
+	size_t data_len;
+
+	ptr = RTE_PTR_ADD(elem, sizeof(*elem));
+	data_len = elem->size - MALLOC_ELEM_OVERHEAD;
+
+	elem = elem_join_adjacent_free(elem);
 
-	/* check if previous element is free, if so join with it and return,
-	 * need to re-insert in free list, as that element's size is changing
-	 */
-	if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
-		elem_free_list_remove(elem->prev);
-		join_elem(elem->prev, elem);
-		sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
-		ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
-		elem = elem->prev;
-	}
 	malloc_elem_free_list_insert(elem);
 
 	/* decrease heap's count of allocated elements */
 	elem->heap->alloc_count--;
 
-	memset(ptr, 0, sz);
+	memset(ptr, 0, data_len);
 
 	return 0;
 }
@@ -309,21 +431,23 @@ int
 malloc_elem_resize(struct malloc_elem *elem, size_t size)
 {
 	const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
+
 	/* if we request a smaller size, then always return ok */
 	if (elem->size >= new_size)
 		return 0;
 
-	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
-	if (next ->state != ELEM_FREE)
+	/* check if there is a next element, it's free and adjacent */
+	if (!elem->next || elem->next->state != ELEM_FREE ||
+			!next_elem_is_adjacent(elem))
 		return -1;
-	if (elem->size + next->size < new_size)
+	if (elem->size + elem->next->size < new_size)
 		return -1;
 
 	/* we now know the element fits, so remove from free list,
 	 * join the two
 	 */
-	elem_free_list_remove(next);
-	join_elem(elem, next);
+	elem_free_list_remove(elem->next);
+	join_elem(elem, elem->next);
 
 	if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
 		/* now we have a big block together. Lets cut it down a bit, by splitting */
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index ce39129..b3d39c0 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -48,6 +48,7 @@ enum elem_state {
 struct malloc_elem {
 	struct malloc_heap *heap;
 	struct malloc_elem *volatile prev;      /* points to prev elem in memseg */
+	struct malloc_elem *volatile next;      /* points to next elem in memseg */
 	LIST_ENTRY(malloc_elem) free_list;      /* list of free elements in heap */
 	const struct rte_memseg *ms;
 	volatile enum elem_state state;
@@ -139,12 +140,8 @@ malloc_elem_init(struct malloc_elem *elem,
 		const struct rte_memseg *ms,
 		size_t size);
 
-/*
- * initialise a dummy malloc_elem header for the end-of-memseg marker
- */
 void
-malloc_elem_mkend(struct malloc_elem *elem,
-		struct malloc_elem *prev_free);
+malloc_elem_insert(struct malloc_elem *elem);
 
 /*
  * return true if the current malloc_elem can hold a block of data
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index b3a1043..1b35468 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -99,15 +99,11 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
 static void
 malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
 {
-	/* allocate the memory block headers, one at end, one at start */
 	struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
-	struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
-			ms->len - MALLOC_ELEM_OVERHEAD);
-	end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
-	const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
+	const size_t elem_size = ms->len - MALLOC_ELEM_OVERHEAD;
 
 	malloc_elem_init(start_elem, heap, ms, elem_size);
-	malloc_elem_mkend(end_elem, start_elem);
+	malloc_elem_insert(start_elem);
 	malloc_elem_free_list_insert(start_elem);
 
 	heap->total_size += elem_size;
-- 
2.7.4

  parent reply	other threads:[~2017-12-19 11:05 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-19 11:04 [dpdk-dev] [PATCH 00/23] Dynamic memory allocation for DPDK Anatoly Burakov
     [not found] ` <cover.1513594170.git.anatoly.burakov@intel.com>
2017-12-19 11:04   ` [dpdk-dev] [RFC 01/23] eal/memory: move get_virtual_area out of linuxapp eal_memory.c Anatoly Burakov
2017-12-19 11:15     ` Burakov, Anatoly
2017-12-19 11:04   ` [dpdk-dev] [RFC 02/23] eal/lcore: add function to report number of detected sockets Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 04/23] eal/malloc: move all locking to heap Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 05/23] eal/malloc: protect malloc heap stats with a lock Anatoly Burakov
2017-12-19 11:04   ` Anatoly Burakov [this message]
2017-12-19 11:04   ` [dpdk-dev] [RFC 07/23] eal/malloc: make malloc_elem_join_adjacent_free public Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 10/23] eal: populate hugepage counts from socket-specific sysfs path Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 12/23] eal/memalloc: add support for dynamic memory allocation Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 13/23] eal/memory: make use of dynamic memory allocation for init Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 14/23] eal/memory: add support for dynamic unmapping of pages Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 15/23] eal/memalloc: add function to check if memory is physically contiguous Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 16/23] eal/malloc: enable dynamic memory allocation/free on malloc/free Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 17/23] eal/malloc: add backend support for contiguous memory allocation Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 18/23] eal/malloc: add rte_malloc support for allocating contiguous memory Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 19/23] eal/memzone: add support for reserving physically contiguous memzones Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 20/23] eal/memzone: make memzones use rte_fbarray Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 21/23] lib/mempool: add support for the new memory allocation methods Anatoly Burakov
2017-12-19 11:04   ` [dpdk-dev] [RFC 23/23] eal/memalloc: register/unregister memory with VFIO when alloc/free pages Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 01/23] eal: move get_virtual_area out of linuxapp eal_memory.c Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 02/23] eal: add function to report number of detected sockets Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 03/23] eal: add rte_fbarray Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 04/23] eal: move all locking to heap Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 05/23] eal: protect malloc heap stats with a lock Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 06/23] eal: make malloc a doubly-linked list Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 07/23] eal: make malloc_elem_join_adjacent_free public Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 08/23] eal: add "single file segments" command-line option Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 09/23] eal: add "legacy memory" option Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 10/23] eal: read hugepage counts from node-specific sysfs path Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 11/23] eal: replace memseg with memseg lists Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 12/23] eal: add support for dynamic memory allocation Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 13/23] eal: make use of dynamic memory allocation for init Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 14/23] eal: add support for dynamic unmapping of pages Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 15/23] eal: add API to check if memory is physically contiguous Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 16/23] eal: enable dynamic memory allocation/free on malloc/free Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 17/23] eal: add backend support for contiguous memory allocation Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 18/23] eal: add rte_malloc support for allocating contiguous memory Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 19/23] eal: enable reserving physically contiguous memzones Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 20/23] eal: make memzones use rte_fbarray Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 21/23] mempool: add support for the new memory allocation methods Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 22/23] vfio: allow to map other memory regions Anatoly Burakov
2017-12-19 11:04 ` [dpdk-dev] [PATCH 23/23] eal: map/unmap memory with VFIO when alloc/free pages Anatoly Burakov
2017-12-19 11:15 ` [dpdk-dev] [PATCH 00/23] Dynamic memory allocation for DPDK Burakov, Anatoly

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b1534e04ab11fa6278ed7eba16b734ca641832b7.1513594170.git.anatoly.burakov@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=andras.kovacs@ericsson.com \
    --cc=benjamin.walker@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=keith.wiles@intel.com \
    --cc=laszlo.vadkeri@ericsson.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).