From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <aburakov@ecsmtp.ir.intel.com>
Received: from mga05.intel.com (mga05.intel.com [192.55.52.43])
 by dpdk.org (Postfix) with ESMTP id 4BC051BB8F
 for <dev@dpdk.org>; Wed, 11 Apr 2018 14:30:52 +0200 (CEST)
X-Amp-Result: SKIPPED(no attachment in message)
X-Amp-File-Uploaded: False
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;
 11 Apr 2018 05:30:51 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.48,436,1517904000"; d="scan'208";a="46047716"
Received: from irvmail001.ir.intel.com ([163.33.26.43])
 by fmsmga001.fm.intel.com with ESMTP; 11 Apr 2018 05:30:46 -0700
Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com
 [10.237.217.45])
 by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id
 w3BCUkNc012287; Wed, 11 Apr 2018 13:30:46 +0100
Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1])
 by sivswdev01.ir.intel.com with ESMTP id w3BCUkp0013517;
 Wed, 11 Apr 2018 13:30:46 +0100
Received: (from aburakov@localhost)
 by sivswdev01.ir.intel.com with LOCAL id w3BCUkK1013513;
 Wed, 11 Apr 2018 13:30:46 +0100
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Cc: keith.wiles@intel.com, jianfeng.tan@intel.com, andras.kovacs@ericsson.com, 
 laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com,
 bruce.richardson@intel.com, thomas@monjalon.net,
 konstantin.ananyev@intel.com, kuralamudhan.ramakrishnan@intel.com,
 louise.m.daly@intel.com, nelio.laranjeiro@6wind.com,
 yskoh@mellanox.com, pepperjo@japf.ch, jerin.jacob@caviumnetworks.com,
 hemant.agrawal@nxp.com, olivier.matz@6wind.com, shreyansh.jain@nxp.com,
 gowrishankar.m@linux.vnet.ibm.com
Date: Wed, 11 Apr 2018 13:29:38 +0100
Message-Id: <03ea3916103bae4b8e08ebad6f65d669f7ac2a81.1523448978.git.anatoly.burakov@intel.com>
X-Mailer: git-send-email 1.7.0.7
In-Reply-To: <cover.1523448978.git.anatoly.burakov@intel.com>
References: <cover.1523448978.git.anatoly.burakov@intel.com>
In-Reply-To: <cover.1523448978.git.anatoly.burakov@intel.com>
References: <cover.1523296700.git.anatoly.burakov@intel.com>
 <cover.1523448978.git.anatoly.burakov@intel.com>
Subject: [dpdk-dev] [PATCH v6 03/70] malloc: make heap a doubly-linked list
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Wed, 11 Apr 2018 12:30:53 -0000

As we are preparing for dynamic memory allocation, we need to be
able to handle holes in our malloc heap, hence we're switching to
doubly linked list, and prepare infrastructure to support it.

Since our heap is now aware where are our first and last elements,
there is no longer any need to have a dummy element at the end of
each heap, so get rid of that as well. Instead, let insert/remove/
join/split operations handle end-of-list conditions automatically.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Tested-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
---
 lib/librte_eal/common/include/rte_malloc_heap.h |   6 +
 lib/librte_eal/common/malloc_elem.c             | 200 +++++++++++++++++++-----
 lib/librte_eal/common/malloc_elem.h             |  14 +-
 lib/librte_eal/common/malloc_heap.c             |   8 +-
 4 files changed, 179 insertions(+), 49 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index ba99ed9..d43fa90 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -13,12 +13,18 @@
 /* Number of free lists per heap, grouped by size. */
 #define RTE_HEAP_NUM_FREELISTS  13
 
+/* dummy definition, for pointers */
+struct malloc_elem;
+
 /**
  * Structure to hold malloc heap
  */
 struct malloc_heap {
 	rte_spinlock_t lock;
 	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
+	struct malloc_elem *volatile first;
+	struct malloc_elem *volatile last;
+
 	unsigned alloc_count;
 	size_t total_size;
 } __rte_cache_aligned;
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index ea041e2..eb41200 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -31,6 +31,7 @@ malloc_elem_init(struct malloc_elem *elem,
 	elem->heap = heap;
 	elem->ms = ms;
 	elem->prev = NULL;
+	elem->next = NULL;
 	memset(&elem->free_list, 0, sizeof(elem->free_list));
 	elem->state = ELEM_FREE;
 	elem->size = size;
@@ -39,15 +40,56 @@ malloc_elem_init(struct malloc_elem *elem,
 	set_trailer(elem);
 }
 
-/*
- * Initialize a dummy malloc_elem header for the end-of-memseg marker
- */
 void
-malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
+malloc_elem_insert(struct malloc_elem *elem)
 {
-	malloc_elem_init(elem, prev->heap, prev->ms, 0);
-	elem->prev = prev;
-	elem->state = ELEM_BUSY; /* mark busy so its never merged */
+	struct malloc_elem *prev_elem, *next_elem;
+	struct malloc_heap *heap = elem->heap;
+
+	if (heap->first == NULL && heap->last == NULL) {
+		/* if empty heap */
+		heap->first = elem;
+		heap->last = elem;
+		prev_elem = NULL;
+		next_elem = NULL;
+	} else if (elem < heap->first) {
+		/* if lower than start */
+		prev_elem = NULL;
+		next_elem = heap->first;
+		heap->first = elem;
+	} else if (elem > heap->last) {
+		/* if higher than end */
+		prev_elem = heap->last;
+		next_elem = NULL;
+		heap->last = elem;
+	} else {
+		/* the new memory is somewhere inbetween start and end */
+		uint64_t dist_from_start, dist_from_end;
+
+		dist_from_end = RTE_PTR_DIFF(heap->last, elem);
+		dist_from_start = RTE_PTR_DIFF(elem, heap->first);
+
+		/* check which is closer, and find closest list entries */
+		if (dist_from_start < dist_from_end) {
+			prev_elem = heap->first;
+			while (prev_elem->next < elem)
+				prev_elem = prev_elem->next;
+			next_elem = prev_elem->next;
+		} else {
+			next_elem = heap->last;
+			while (next_elem->prev > elem)
+				next_elem = next_elem->prev;
+			prev_elem = next_elem->prev;
+		}
+	}
+
+	/* insert new element */
+	elem->prev = prev_elem;
+	elem->next = next_elem;
+	if (prev_elem)
+		prev_elem->next = elem;
+	if (next_elem)
+		next_elem->prev = elem;
 }
 
 /*
@@ -98,18 +140,58 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size,	unsigned align,
 static void
 split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
 {
-	struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size);
+	struct malloc_elem *next_elem = elem->next;
 	const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
 	const size_t new_elem_size = elem->size - old_elem_size;
 
 	malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
 	split_pt->prev = elem;
-	next_elem->prev = split_pt;
+	split_pt->next = next_elem;
+	if (next_elem)
+		next_elem->prev = split_pt;
+	else
+		elem->heap->last = split_pt;
+	elem->next = split_pt;
 	elem->size = old_elem_size;
 	set_trailer(elem);
 }
 
 /*
+ * our malloc heap is a doubly linked list, so doubly remove our element.
+ */
+static void __rte_unused
+remove_elem(struct malloc_elem *elem)
+{
+	struct malloc_elem *next, *prev;
+	next = elem->next;
+	prev = elem->prev;
+
+	if (next)
+		next->prev = prev;
+	else
+		elem->heap->last = prev;
+	if (prev)
+		prev->next = next;
+	else
+		elem->heap->first = next;
+
+	elem->prev = NULL;
+	elem->next = NULL;
+}
+
+static int
+next_elem_is_adjacent(struct malloc_elem *elem)
+{
+	return elem->next == RTE_PTR_ADD(elem, elem->size);
+}
+
+static int
+prev_elem_is_adjacent(struct malloc_elem *elem)
+{
+	return elem == RTE_PTR_ADD(elem->prev, elem->prev->size);
+}
+
+/*
  * Given an element size, compute its freelist index.
  * We free an element into the freelist containing similarly-sized elements.
  * We try to allocate elements starting with the freelist containing
@@ -192,6 +274,9 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
 
 		split_elem(elem, new_free_elem);
 		malloc_elem_free_list_insert(new_free_elem);
+
+		if (elem == elem->heap->last)
+			elem->heap->last = new_free_elem;
 	}
 
 	if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
@@ -230,9 +315,62 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
 static inline void
 join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
 {
-	struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size);
+	struct malloc_elem *next = elem2->next;
 	elem1->size += elem2->size;
-	next->prev = elem1;
+	if (next)
+		next->prev = elem1;
+	else
+		elem1->heap->last = elem1;
+	elem1->next = next;
+}
+
+static struct malloc_elem *
+elem_join_adjacent_free(struct malloc_elem *elem)
+{
+	/*
+	 * check if next element exists, is adjacent and is free, if so join
+	 * with it, need to remove from free list.
+	 */
+	if (elem->next != NULL && elem->next->state == ELEM_FREE &&
+			next_elem_is_adjacent(elem)) {
+		void *erase;
+
+		/* we will want to erase the trailer and header */
+		erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+
+		/* remove from free list, join to this one */
+		elem_free_list_remove(elem->next);
+		join_elem(elem, elem->next);
+
+		/* erase header and trailer */
+		memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+	}
+
+	/*
+	 * check if prev element exists, is adjacent and is free, if so join
+	 * with it, need to remove from free list.
+	 */
+	if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
+			prev_elem_is_adjacent(elem)) {
+		struct malloc_elem *new_elem;
+		void *erase;
+
+		/* we will want to erase trailer and header */
+		erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+
+		/* remove from free list, join to this one */
+		elem_free_list_remove(elem->prev);
+
+		new_elem = elem->prev;
+		join_elem(new_elem, elem);
+
+		/* erase header and trailer */
+		memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+
+		elem = new_elem;
+	}
+
+	return elem;
 }
 
 /*
@@ -243,32 +381,20 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
 int
 malloc_elem_free(struct malloc_elem *elem)
 {
-	size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN;
-	uint8_t *ptr = (uint8_t *)&elem[1];
-	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
-	if (next->state == ELEM_FREE){
-		/* remove from free list, join to this one */
-		elem_free_list_remove(next);
-		join_elem(elem, next);
-		sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
-	}
+	void *ptr;
+	size_t data_len;
+
+	ptr = RTE_PTR_ADD(elem, sizeof(*elem));
+	data_len = elem->size - MALLOC_ELEM_OVERHEAD;
+
+	elem = elem_join_adjacent_free(elem);
 
-	/* check if previous element is free, if so join with it and return,
-	 * need to re-insert in free list, as that element's size is changing
-	 */
-	if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
-		elem_free_list_remove(elem->prev);
-		join_elem(elem->prev, elem);
-		sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
-		ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
-		elem = elem->prev;
-	}
 	malloc_elem_free_list_insert(elem);
 
 	/* decrease heap's count of allocated elements */
 	elem->heap->alloc_count--;
 
-	memset(ptr, 0, sz);
+	memset(ptr, 0, data_len);
 
 	return 0;
 }
@@ -281,21 +407,23 @@ int
 malloc_elem_resize(struct malloc_elem *elem, size_t size)
 {
 	const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
+
 	/* if we request a smaller size, then always return ok */
 	if (elem->size >= new_size)
 		return 0;
 
-	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
-	if (next ->state != ELEM_FREE)
+	/* check if there is a next element, it's free and adjacent */
+	if (!elem->next || elem->next->state != ELEM_FREE ||
+			!next_elem_is_adjacent(elem))
 		return -1;
-	if (elem->size + next->size < new_size)
+	if (elem->size + elem->next->size < new_size)
 		return -1;
 
 	/* we now know the element fits, so remove from free list,
 	 * join the two
 	 */
-	elem_free_list_remove(next);
-	join_elem(elem, next);
+	elem_free_list_remove(elem->next);
+	join_elem(elem, elem->next);
 
 	if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
 		/* now we have a big block together. Lets cut it down a bit, by splitting */
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index f4c1c7a..238e451 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -18,8 +18,12 @@ enum elem_state {
 
 struct malloc_elem {
 	struct malloc_heap *heap;
-	struct malloc_elem *volatile prev;      /* points to prev elem in memseg */
-	LIST_ENTRY(malloc_elem) free_list;      /* list of free elements in heap */
+	struct malloc_elem *volatile prev;
+	/**< points to prev elem in memseg */
+	struct malloc_elem *volatile next;
+	/**< points to next elem in memseg */
+	LIST_ENTRY(malloc_elem) free_list;
+	/**< list of free elements in heap */
 	const struct rte_memseg *ms;
 	volatile enum elem_state state;
 	uint32_t pad;
@@ -110,12 +114,8 @@ malloc_elem_init(struct malloc_elem *elem,
 		const struct rte_memseg *ms,
 		size_t size);
 
-/*
- * initialise a dummy malloc_elem header for the end-of-memseg marker
- */
 void
-malloc_elem_mkend(struct malloc_elem *elem,
-		struct malloc_elem *prev_free);
+malloc_elem_insert(struct malloc_elem *elem);
 
 /*
  * return true if the current malloc_elem can hold a block of data
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 7d8d70a..9c95166 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -70,15 +70,11 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
 static void
 malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
 {
-	/* allocate the memory block headers, one at end, one at start */
 	struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
-	struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
-			ms->len - MALLOC_ELEM_OVERHEAD);
-	end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
-	const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
+	const size_t elem_size = ms->len - MALLOC_ELEM_OVERHEAD;
 
 	malloc_elem_init(start_elem, heap, ms, elem_size);
-	malloc_elem_mkend(end_elem, start_elem);
+	malloc_elem_insert(start_elem);
 	malloc_elem_free_list_insert(start_elem);
 
 	heap->total_size += elem_size;
-- 
2.7.4