From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from prod-mail-xrelay06.akamai.com (prod-mail-xrelay06.akamai.com [96.6.114.98]) by dpdk.org (Postfix) with ESMTP id E4AEF532D for ; Thu, 8 May 2014 23:17:15 +0200 (CEST) Received: from prod-mail-xrelay06.akamai.com (localhost.localdomain [127.0.0.1]) by postfix.imss70 (Postfix) with ESMTP id E725D1655D8 for ; Thu, 8 May 2014 21:17:21 +0000 (GMT) Received: from prod-mail-relay08.akamai.com (unknown [172.27.22.71]) by prod-mail-xrelay06.akamai.com (Postfix) with ESMTP id DC2571655BE for ; Thu, 8 May 2014 21:17:21 +0000 (GMT) Received: from ustx2ex-cashub.dfw01.corp.akamai.com (ustx2ex-cashub2.dfw01.corp.akamai.com [172.27.25.76]) by prod-mail-relay08.akamai.com (Postfix) with ESMTP id D948298055 for ; Thu, 8 May 2014 21:17:21 +0000 (GMT) Received: from USMBX2.msg.corp.akamai.com ([169.254.1.21]) by ustx2ex-cashub2.dfw01.corp.akamai.com ([172.27.25.76]) with mapi; Thu, 8 May 2014 16:17:21 -0500 From: "Sanford, Robert" To: "dev@dpdk.org" Date: Thu, 8 May 2014 16:17:22 -0500 Thread-Topic: [PATCH] malloc: fix rte_free run time in O(n) free blocks Thread-Index: Ac9rAuX4OMckLJadQACLsRZx2PdsJw== Message-ID: References: In-Reply-To: Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: user-agent: Microsoft-MacOutlook/14.4.1.140326 acceptlanguage: en-US Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Subject: Re: [dpdk-dev] [PATCH] malloc: fix rte_free run time in O(n) free blocks X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 May 2014 21:17:16 -0000 I haven't heard anything regarding this patch. In general, how does your team keep track of outstanding patch requests or RFCs? Thanks, Robert >Here is our proposed patch: > >Lib rte_malloc stores free blocks in singly-linked lists. >This results in O(n), i.e., poor performance when freeing memory >to a heap that contains thousands of free blocks. > >A simple solution is to store free blocks on doubly-linked lists. >- Space-wise, we add one new pointer to struct malloc_elem. > This does not affect its size, since it is cache-aligned and > currently has room for another pointer. >- We perform all free list adds and deletes in functions > malloc_elem_add_to_free_list and remove_from_free_list. >- We clean up internal malloc functions that push or > return pointers to previous blocks in the free list. > > >--- > lib/librte_malloc/malloc_elem.c | 82 >+++++++++++++++++++++++--------------- > lib/librte_malloc/malloc_elem.h | 6 ++- > lib/librte_malloc/malloc_heap.c | 19 +++------ > 3 files changed, 60 insertions(+), 47 deletions(-) > >diff --git a/lib/librte_malloc/malloc_elem.c >b/lib/librte_malloc/malloc_elem.c >index f0da640..9a14e23 100644 >--- a/lib/librte_malloc/malloc_elem.c >+++ b/lib/librte_malloc/malloc_elem.c >@@ -60,7 +60,7 @@ malloc_elem_init(struct malloc_elem *elem, > { > elem->heap =3D heap; > elem->mz =3D mz; >- elem->prev =3D elem->next_free =3D NULL; >+ elem->prev =3D elem->next_free =3D elem->prev_free =3D NULL; > elem->state =3D ELEM_FREE; > elem->size =3D size; > elem->pad =3D 0; >@@ -125,14 +125,58 @@ split_elem(struct malloc_elem *elem, struct >malloc_elem *split_pt) > } >=20 > /* >+ * Add the specified element to its heap's free list. >+ */ >+void >+malloc_elem_add_to_free_list(struct malloc_elem *elem) >+{ >+ elem->state =3D ELEM_FREE; >+ elem->prev_free =3D NULL; >+ if (!!(elem->next_free =3D elem->heap->free_head)) >+ elem->next_free->prev_free =3D elem; >+ elem->heap->free_head =3D elem; >+} >+ >+/* >+ * Remove the specified element from its heap's free list. >+ */ >+static inline void >+remove_from_free_list(struct malloc_elem *elem) >+{ >+ struct malloc_elem *next_free_elem =3D elem->next_free; >+ struct malloc_elem *prev_free_elem =3D elem->prev_free; >+ >+ if (!!next_free_elem) { >+#ifdef RTE_LIBRTE_MALLOC_DEBUG >+ RTE_VERIFY(next_free_elem->prev_free =3D=3D elem); >+#endif >+ next_free_elem->prev_free =3D prev_free_elem; >+ } >+ >+ if (!!prev_free_elem) { >+#ifdef RTE_LIBRTE_MALLOC_DEBUG >+ RTE_VERIFY(prev_free_elem->next_free =3D=3D elem); >+#endif >+ prev_free_elem->next_free =3D next_free_elem; >+ } >+ else { >+ struct malloc_heap *heap =3D elem->heap; >+ >+#ifdef RTE_LIBRTE_MALLOC_DEBUG >+ RTE_VERIFY(heap->free_head =3D=3D elem); >+#endif >+ heap->free_head =3D next_free_elem; >+ } >+} >+ >+/* > * reserve a block of data in an existing malloc_elem. If the malloc_elem > * is much larger than the data block requested, we split the element in >two. > * This function is only called from malloc_heap_alloc so parameter >checking > * is not done here, as it's done there previously. > */ > struct malloc_elem * >-malloc_elem_alloc(struct malloc_elem *elem, size_t size, >- unsigned align, struct malloc_elem *prev_free) >+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align) > { > struct malloc_elem *new_elem =3D elem_start_pt(elem, size, align); > const unsigned old_elem_size =3D (uintptr_t)new_elem - (uintptr_t)elem; >@@ -151,10 +195,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t >size, > set_header(new_elem); > } > /* remove element from free list */ >- if (prev_free =3D=3D NULL) >- elem->heap->free_head =3D elem->next_free; >- else >- prev_free->next_free =3D elem->next_free; >+ remove_from_free_list(elem); >=20 > return new_elem; > } >@@ -182,25 +223,6 @@ join_elem(struct malloc_elem *elem1, struct >malloc_elem *elem2) > } >=20 > /* >- * scan the free list, and remove the request element from that >- * free list. (Free list to scan is got from heap pointer in element) >- */ >-static inline void >-remove_from_free_list(struct malloc_elem *elem) >-{ >- if (elem =3D=3D elem->heap->free_head) >- elem->heap->free_head =3D elem->next_free; >- else{ >- struct malloc_elem *prev_free =3D elem->heap->free_head; >- while (prev_free && prev_free->next_free !=3D elem) >- prev_free =3D prev_free->next_free; >- if (!prev_free) >- rte_panic("Corrupted free list\n"); >- prev_free->next_free =3D elem->next_free; >- } >-} >- >-/* > * free a malloc_elem block by adding it to the free list. If the > * blocks either immediately before or immediately after newly freed >block > * are also free, the blocks are merged together. >@@ -226,9 +248,7 @@ malloc_elem_free(struct malloc_elem *elem) > join_elem(elem->prev, elem); > /* otherwise add ourselves to the free list */ > else { >- elem->next_free =3D elem->heap->free_head; >- elem->heap->free_head =3D elem; >- elem->state =3D ELEM_FREE; >+ malloc_elem_add_to_free_list(elem); > elem->pad =3D 0; > } > /* decrease heap's count of allocated elements */ >@@ -269,9 +289,7 @@ malloc_elem_resize(struct malloc_elem *elem, size_t >size) > struct malloc_elem *split_pt =3D RTE_PTR_ADD(elem, new_size); > split_pt =3D RTE_PTR_ALIGN_CEIL(split_pt, CACHE_LINE_SIZE); > split_elem(elem, split_pt); >- split_pt->state =3D ELEM_FREE; >- split_pt->next_free =3D elem->heap->free_head; >- elem->heap->free_head =3D split_pt; >+ malloc_elem_add_to_free_list(split_pt); > } > rte_spinlock_unlock(&elem->heap->lock); > return 0; >diff --git a/lib/librte_malloc/malloc_elem.h >b/lib/librte_malloc/malloc_elem.h >index eadecf9..6d2d7db 100644 >--- a/lib/librte_malloc/malloc_elem.h >+++ b/lib/librte_malloc/malloc_elem.h >@@ -47,6 +47,7 @@ struct malloc_elem { > struct malloc_heap *heap; > struct malloc_elem *volatile prev; /* points to prev elem in >memzone */ > struct malloc_elem *volatile next_free; /* to make list of free elements >*/ >+ struct malloc_elem *volatile prev_free; /* make free list doubly-linked >*/ > const struct rte_memzone *mz; > volatile enum elem_state state; > uint32_t pad; >@@ -156,8 +157,7 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t >size, unsigned align); > * is much larger than the data block requested, we split the element in >two. > */ > struct malloc_elem * >-malloc_elem_alloc(struct malloc_elem *elem, size_t size, >- unsigned align, struct malloc_elem *prev_free); >+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align); >=20 > /* > * free a malloc_elem block by adding it to the free list. If the >@@ -174,4 +174,6 @@ malloc_elem_free(struct malloc_elem *elem); > int > malloc_elem_resize(struct malloc_elem *elem, size_t size); >=20 >+void malloc_elem_add_to_free_list(struct malloc_elem *elem); >+ > #endif /* MALLOC_ELEM_H_ */ >diff --git a/lib/librte_malloc/malloc_heap.c >b/lib/librte_malloc/malloc_heap.c >index f4a0294..bc146e1 100644 >--- a/lib/librte_malloc/malloc_heap.c >+++ b/lib/librte_malloc/malloc_heap.c >@@ -112,9 +112,8 @@ malloc_heap_add_memzone(struct malloc_heap *heap, >size_t size, unsigned align) > const unsigned elem_size =3D (uintptr_t)end_elem - (uintptr_t)start_elem= ; > malloc_elem_init(start_elem, heap, mz, elem_size); > malloc_elem_mkend(end_elem, start_elem); >+ malloc_elem_add_to_free_list(start_elem); >=20 >- start_elem->next_free =3D heap->free_head; >- heap->free_head =3D start_elem; > /* increase heap total size by size of new memzone */ > heap->total_size+=3Dmz_size - MALLOC_ELEM_OVERHEAD; > return 0; >@@ -160,27 +159,22 @@ malloc_heap_init(struct malloc_heap *heap) > * (to make removing the element from the free list faster). > */ > static struct malloc_elem * >-find_suitable_element(struct malloc_heap *heap, size_t size, >- unsigned align, struct malloc_elem **prev) >+find_suitable_element(struct malloc_heap *heap, size_t size, unsigned >align) > { >- struct malloc_elem *elem, *min_elem, *min_prev; >+ struct malloc_elem *elem, *min_elem; > size_t min_sz; >=20 > elem =3D heap->free_head; > min_elem =3D NULL; >- min_prev =3D NULL; > min_sz =3D (size_t) SIZE_MAX; >- *prev =3D NULL; >=20 > while(elem){ > if (malloc_elem_can_hold(elem, size, align)) { > if (min_sz > elem->size) { > min_elem =3D elem; >- *prev =3D min_prev; > min_sz =3D elem->size; > } > } >- min_prev =3D elem; > elem =3D elem->next_free; > } > return (min_elem); >@@ -202,15 +196,14 @@ malloc_heap_alloc(struct malloc_heap *heap, > size =3D CACHE_LINE_ROUNDUP(size); > align =3D CACHE_LINE_ROUNDUP(align); > rte_spinlock_lock(&heap->lock); >- struct malloc_elem *prev, *elem =3D find_suitable_element(heap, >- size, align, &prev); >+ struct malloc_elem *elem =3D find_suitable_element(heap, size, align); > if (elem =3D=3D NULL){ > if ((malloc_heap_add_memzone(heap, size, align)) =3D=3D 0) >- elem =3D find_suitable_element(heap, size, align, &prev); >+ elem =3D find_suitable_element(heap, size, align); > } >=20 > if (elem !=3D NULL){ >- elem =3D malloc_elem_alloc(elem, size, align, prev); >+ elem =3D malloc_elem_alloc(elem, size, align); > /* increase heap's count of allocated elements */ > heap->alloc_count++; > } >--=20 >1.7.1 > >Signed-off-by: Robert Sanford >