From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <rsanford@akamai.com>
Received: from prod-mail-xrelay08.akamai.com (prod-mail-xrelay08.akamai.com
 [96.6.114.112]) by dpdk.org (Postfix) with ESMTP id 4C69E5910
 for <dev@dpdk.org>; Mon, 21 Apr 2014 21:24:37 +0200 (CEST)
Received: from prod-mail-xrelay08.akamai.com (localhost.localdomain
 [127.0.0.1]) by postfix.imss70 (Postfix) with ESMTP id D5931481C5
 for <dev@dpdk.org>; Mon, 21 Apr 2014 19:24:38 +0000 (GMT)
Received: from prod-mail-relay08.akamai.com (unknown [172.27.22.71])
 by prod-mail-xrelay08.akamai.com (Postfix) with ESMTP id CA64B48114
 for <dev@dpdk.org>; Mon, 21 Apr 2014 19:24:38 +0000 (GMT)
Received: from ustx2ex-cashub.dfw01.corp.akamai.com
 (ustx2ex-cashub2.dfw01.corp.akamai.com [172.27.25.76])
 by prod-mail-relay08.akamai.com (Postfix) with ESMTP id C7C5198046
 for <dev@dpdk.org>; Mon, 21 Apr 2014 19:24:38 +0000 (GMT)
Received: from USMBX2.msg.corp.akamai.com ([169.254.1.44]) by
 ustx2ex-cashub2.dfw01.corp.akamai.com ([172.27.25.76]) with mapi; Mon, 21 Apr
 2014 14:24:38 -0500
From: "Sanford, Robert" <rsanford@akamai.com>
To: "dev@dpdk.org" <dev@dpdk.org>
Date: Mon, 21 Apr 2014 14:24:36 -0500
Thread-Topic: [PATCH] malloc: fix rte_free run time in O(n) free blocks
Thread-Index: Ac9dl1YBjarG0kgmRZi+R4FNy2igkQ==
Message-ID: <CF7ADF8B.B09%rsanford@akamai.com>
Accept-Language: en-US
Content-Language: en-US
X-MS-Has-Attach: 
X-MS-TNEF-Correlator: 
user-agent: Microsoft-MacOutlook/14.3.9.131030
acceptlanguage: en-US
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0
Subject: [dpdk-dev] [PATCH] malloc: fix rte_free run time in O(n) free blocks
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Mon, 21 Apr 2014 19:24:38 -0000

Here is our proposed patch:

Lib rte_malloc stores free blocks in singly-linked lists.
This results in O(n), i.e., poor performance when freeing memory
to a heap that contains thousands of free blocks.

A simple solution is to store free blocks on doubly-linked lists.
- Space-wise, we add one new pointer to struct malloc_elem.
 This does not affect its size, since it is cache-aligned and
 currently has room for another pointer.
- We perform all free list adds and deletes in functions
 malloc_elem_add_to_free_list and remove_from_free_list.
- We clean up internal malloc functions that push or
 return pointers to previous blocks in the free list.


---
 lib/librte_malloc/malloc_elem.c |   82
+++++++++++++++++++++++---------------
 lib/librte_malloc/malloc_elem.h |    6 ++-
 lib/librte_malloc/malloc_heap.c |   19 +++------
 3 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/lib/librte_malloc/malloc_elem.c
b/lib/librte_malloc/malloc_elem.c
index f0da640..9a14e23 100644
--- a/lib/librte_malloc/malloc_elem.c
+++ b/lib/librte_malloc/malloc_elem.c
@@ -60,7 +60,7 @@ malloc_elem_init(struct malloc_elem *elem,
 {
 	elem->heap =3D heap;
 	elem->mz =3D mz;
-	elem->prev =3D elem->next_free =3D NULL;
+	elem->prev =3D elem->next_free =3D elem->prev_free =3D NULL;
 	elem->state =3D ELEM_FREE;
 	elem->size =3D size;
 	elem->pad =3D 0;
@@ -125,14 +125,58 @@ split_elem(struct malloc_elem *elem, struct
malloc_elem *split_pt)
 }
=20
 /*
+ * Add the specified element to its heap's free list.
+ */
+void
+malloc_elem_add_to_free_list(struct malloc_elem *elem)
+{
+	elem->state =3D ELEM_FREE;
+	elem->prev_free =3D NULL;
+	if (!!(elem->next_free =3D elem->heap->free_head))
+		elem->next_free->prev_free =3D elem;
+	elem->heap->free_head =3D elem;
+}
+
+/*
+ * Remove the specified element from its heap's free list.
+ */
+static inline void
+remove_from_free_list(struct malloc_elem *elem)
+{
+	struct malloc_elem *next_free_elem =3D elem->next_free;
+	struct malloc_elem *prev_free_elem =3D elem->prev_free;
+
+	if (!!next_free_elem) {
+#ifdef RTE_LIBRTE_MALLOC_DEBUG
+		RTE_VERIFY(next_free_elem->prev_free =3D=3D elem);
+#endif
+		next_free_elem->prev_free =3D prev_free_elem;
+	}
+
+	if (!!prev_free_elem) {
+#ifdef RTE_LIBRTE_MALLOC_DEBUG
+		RTE_VERIFY(prev_free_elem->next_free =3D=3D elem);
+#endif
+		prev_free_elem->next_free =3D next_free_elem;
+	}
+	else {
+		struct malloc_heap *heap =3D elem->heap;
+
+#ifdef RTE_LIBRTE_MALLOC_DEBUG
+		RTE_VERIFY(heap->free_head =3D=3D elem);
+#endif
+		heap->free_head =3D next_free_elem;
+	}
+}
+
+/*
  * reserve a block of data in an existing malloc_elem. If the malloc_elem
  * is much larger than the data block requested, we split the element in
two.
  * This function is only called from malloc_heap_alloc so parameter
checking
  * is not done here, as it's done there previously.
  */
 struct malloc_elem *
-malloc_elem_alloc(struct malloc_elem *elem, size_t size,
-		unsigned align, struct malloc_elem *prev_free)
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align)
 {
 	struct malloc_elem *new_elem =3D elem_start_pt(elem, size, align);
 	const unsigned old_elem_size =3D (uintptr_t)new_elem - (uintptr_t)elem;
@@ -151,10 +195,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t
size,
 			set_header(new_elem);
 		}
 		/* remove element from free list */
-		if (prev_free =3D=3D NULL)
-			elem->heap->free_head =3D elem->next_free;
-		else
-			prev_free->next_free =3D elem->next_free;
+		remove_from_free_list(elem);
=20
 		return new_elem;
 	}
@@ -182,25 +223,6 @@ join_elem(struct malloc_elem *elem1, struct
malloc_elem *elem2)
 }
=20
 /*
- * scan the free list, and remove the request element from that
- * free list. (Free list to scan is got from heap pointer in element)
- */
-static inline void
-remove_from_free_list(struct malloc_elem *elem)
-{
-	if (elem =3D=3D elem->heap->free_head)
-		elem->heap->free_head =3D elem->next_free;
-	else{
-		struct malloc_elem *prev_free =3D elem->heap->free_head;
-		while (prev_free && prev_free->next_free !=3D elem)
-			prev_free =3D prev_free->next_free;
-		if (!prev_free)
-			rte_panic("Corrupted free list\n");
-		prev_free->next_free =3D elem->next_free;
-	}
-}
-
-/*
  * free a malloc_elem block by adding it to the free list. If the
  * blocks either immediately before or immediately after newly freed block
  * are also free, the blocks are merged together.
@@ -226,9 +248,7 @@ malloc_elem_free(struct malloc_elem *elem)
 		join_elem(elem->prev, elem);
 	/* otherwise add ourselves to the free list */
 	else {
-		elem->next_free =3D elem->heap->free_head;
-		elem->heap->free_head =3D elem;
-		elem->state =3D ELEM_FREE;
+		malloc_elem_add_to_free_list(elem);
 		elem->pad =3D 0;
 	}
 	/* decrease heap's count of allocated elements */
@@ -269,9 +289,7 @@ malloc_elem_resize(struct malloc_elem *elem, size_t
size)
 		struct malloc_elem *split_pt =3D RTE_PTR_ADD(elem, new_size);
 		split_pt =3D RTE_PTR_ALIGN_CEIL(split_pt, CACHE_LINE_SIZE);
 		split_elem(elem, split_pt);
-		split_pt->state =3D ELEM_FREE;
-		split_pt->next_free =3D elem->heap->free_head;
-		elem->heap->free_head =3D split_pt;
+		malloc_elem_add_to_free_list(split_pt);
 	}
 	rte_spinlock_unlock(&elem->heap->lock);
 	return 0;
diff --git a/lib/librte_malloc/malloc_elem.h
b/lib/librte_malloc/malloc_elem.h
index eadecf9..6d2d7db 100644
--- a/lib/librte_malloc/malloc_elem.h
+++ b/lib/librte_malloc/malloc_elem.h
@@ -47,6 +47,7 @@ struct malloc_elem {
 	struct malloc_heap *heap;
 	struct malloc_elem *volatile prev;      /* points to prev elem in
memzone */
 	struct malloc_elem *volatile next_free; /* to make list of free elements
*/
+	struct malloc_elem *volatile prev_free; /* make free list doubly-linked
*/
 	const struct rte_memzone *mz;
 	volatile enum elem_state state;
 	uint32_t pad;
@@ -156,8 +157,7 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t
size, unsigned align);
  * is much larger than the data block requested, we split the element in
two.
  */
 struct malloc_elem *
-malloc_elem_alloc(struct malloc_elem *elem, size_t size,
-		unsigned align, struct malloc_elem *prev_free);
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align);
=20
 /*
  * free a malloc_elem block by adding it to the free list. If the
@@ -174,4 +174,6 @@ malloc_elem_free(struct malloc_elem *elem);
 int
 malloc_elem_resize(struct malloc_elem *elem, size_t size);
=20
+void malloc_elem_add_to_free_list(struct malloc_elem *elem);
+
 #endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_malloc/malloc_heap.c
b/lib/librte_malloc/malloc_heap.c
index f4a0294..bc146e1 100644
--- a/lib/librte_malloc/malloc_heap.c
+++ b/lib/librte_malloc/malloc_heap.c
@@ -112,9 +112,8 @@ malloc_heap_add_memzone(struct malloc_heap *heap,
size_t size, unsigned align)
 	const unsigned elem_size =3D (uintptr_t)end_elem - (uintptr_t)start_elem;
 	malloc_elem_init(start_elem, heap, mz, elem_size);
 	malloc_elem_mkend(end_elem, start_elem);
+	malloc_elem_add_to_free_list(start_elem);
=20
-	start_elem->next_free =3D heap->free_head;
-	heap->free_head =3D start_elem;
 	/* increase heap total size by size of new memzone */
 	heap->total_size+=3Dmz_size - MALLOC_ELEM_OVERHEAD;
 	return 0;
@@ -160,27 +159,22 @@ malloc_heap_init(struct malloc_heap *heap)
  * (to make removing the element from the free list faster).
  */
 static struct malloc_elem *
-find_suitable_element(struct malloc_heap *heap, size_t size,
-		unsigned align, struct malloc_elem **prev)
+find_suitable_element(struct malloc_heap *heap, size_t size, unsigned
align)
 {
-	struct malloc_elem *elem, *min_elem, *min_prev;
+	struct malloc_elem *elem, *min_elem;
 	size_t min_sz;
=20
 	elem =3D heap->free_head;
 	min_elem =3D NULL;
-	min_prev =3D NULL;
 	min_sz =3D (size_t) SIZE_MAX;
-	*prev =3D NULL;
=20
 	while(elem){
 		if (malloc_elem_can_hold(elem, size, align)) {
 			if (min_sz > elem->size) {
 				min_elem =3D elem;
-				*prev =3D min_prev;
 				min_sz =3D elem->size;
 			}
 		}
-		min_prev =3D elem;
 		elem =3D elem->next_free;
 	}
 	return (min_elem);
@@ -202,15 +196,14 @@ malloc_heap_alloc(struct malloc_heap *heap,
 	size =3D CACHE_LINE_ROUNDUP(size);
 	align =3D CACHE_LINE_ROUNDUP(align);
 	rte_spinlock_lock(&heap->lock);
-	struct malloc_elem *prev, *elem =3D find_suitable_element(heap,
-			size, align, &prev);
+	struct malloc_elem *elem =3D find_suitable_element(heap, size, align);
 	if (elem =3D=3D NULL){
 		if ((malloc_heap_add_memzone(heap, size, align)) =3D=3D 0)
-			elem =3D find_suitable_element(heap, size, align, &prev);
+			elem =3D find_suitable_element(heap, size, align);
 	}
=20
 	if (elem !=3D NULL){
-		elem =3D malloc_elem_alloc(elem, size, align, prev);
+		elem =3D malloc_elem_alloc(elem, size, align);
 		/* increase heap's count of allocated elements */
 		heap->alloc_count++;
 	}
--=20
1.7.1

Signed-off-by: Robert Sanford <rsanford@akamai.com>