DPDK patches and discussions
 help / color / mirror / Atom feed
From: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
To: <dev@dpdk.org>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Subject: [PATCH v1 3/6] mem: add dirty malloc element support
Date: Mon, 17 Jan 2022 10:07:58 +0200	[thread overview]
Message-ID: <20220117080801.481568-4-dkozlyuk@nvidia.com> (raw)
In-Reply-To: <20220117080801.481568-1-dkozlyuk@nvidia.com>

EAL malloc layer assumed all free elements content
is filled with zeros ("clean"), as opposed to uninitialized ("dirty").
This assumption was ensured in two ways:
1. EAL memalloc layer always returned clean memory.
2. Freed memory was cleared before returning into the heap.

Clearing the memory can be as slow as around 14 GiB/s.
To save doing so, memalloc layer is allowed to return dirty memory.
Such segments being marked with RTE_MEMSEG_FLAG_DIRTY.
The allocator tracks elements that contain dirty memory
using the new flag in the element header.
When clean memory is requested via rte_zmalloc*()
and the suitable element is dirty, it is cleared on allocation.
When memory is deallocated, the freed element is joined
with adjacent free elements, and the dirty flag is updated:

    dirty + freed + dirty = dirty  =>  no need to clean
            freed + dirty = dirty      the freed memory

    clean + freed + clean = clean  =>  freed memory
    clean + freed         = clean      must be cleared
            freed + clean = clean
            freed         = clean

As a result, memory is either cleared on free, as before,
or it will be cleared on allocation if need be, but never twice.

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
---
 lib/eal/common/malloc_elem.c | 22 +++++++++++++++++++---
 lib/eal/common/malloc_elem.h | 11 +++++++++--
 lib/eal/common/malloc_heap.c | 18 ++++++++++++------
 lib/eal/common/rte_malloc.c  | 21 ++++++++++++++-------
 lib/eal/include/rte_memory.h |  8 ++++++--
 5 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/lib/eal/common/malloc_elem.c b/lib/eal/common/malloc_elem.c
index bdd20a162e..e04e0890fb 100644
--- a/lib/eal/common/malloc_elem.c
+++ b/lib/eal/common/malloc_elem.c
@@ -129,7 +129,7 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
 void
 malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
 		struct rte_memseg_list *msl, size_t size,
-		struct malloc_elem *orig_elem, size_t orig_size)
+		struct malloc_elem *orig_elem, size_t orig_size, bool dirty)
 {
 	elem->heap = heap;
 	elem->msl = msl;
@@ -137,6 +137,7 @@ malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
 	elem->next = NULL;
 	memset(&elem->free_list, 0, sizeof(elem->free_list));
 	elem->state = ELEM_FREE;
+	elem->dirty = dirty;
 	elem->size = size;
 	elem->pad = 0;
 	elem->orig_elem = orig_elem;
@@ -300,7 +301,7 @@ split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
 	const size_t new_elem_size = elem->size - old_elem_size;
 
 	malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size,
-			 elem->orig_elem, elem->orig_size);
+			elem->orig_elem, elem->orig_size, elem->dirty);
 	split_pt->prev = elem;
 	split_pt->next = next_elem;
 	if (next_elem)
@@ -506,6 +507,7 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
 	else
 		elem1->heap->last = elem1;
 	elem1->next = next;
+	elem1->dirty |= elem2->dirty;
 	if (elem1->pad) {
 		struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
 		inner->size = elem1->size - elem1->pad;
@@ -579,6 +581,14 @@ malloc_elem_free(struct malloc_elem *elem)
 	ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
 	data_len = elem->size - MALLOC_ELEM_OVERHEAD;
 
+	/*
+	 * Consider the element clean for the purposes of joining.
+	 * If both neighbors are clean or non-existent,
+	 * the joint element will be clean,
+	 * which means the memory should be cleared.
+	 * There is no need to clear the memory if the joint element is dirty.
+	 */
+	elem->dirty = false;
 	elem = malloc_elem_join_adjacent_free(elem);
 
 	malloc_elem_free_list_insert(elem);
@@ -588,8 +598,14 @@ malloc_elem_free(struct malloc_elem *elem)
 	/* decrease heap's count of allocated elements */
 	elem->heap->alloc_count--;
 
-	/* poison memory */
+#ifndef RTE_MALLOC_DEBUG
+	/* Normally clear the memory when needed. */
+	if (!elem->dirty)
+		memset(ptr, 0, data_len);
+#else
+	/* Always poison the memory in debug mode. */
 	memset(ptr, MALLOC_POISON, data_len);
+#endif
 
 	return elem;
 }
diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h
index 15d8ba7af2..f2aa98821b 100644
--- a/lib/eal/common/malloc_elem.h
+++ b/lib/eal/common/malloc_elem.h
@@ -27,7 +27,13 @@ struct malloc_elem {
 	LIST_ENTRY(malloc_elem) free_list;
 	/**< list of free elements in heap */
 	struct rte_memseg_list *msl;
-	volatile enum elem_state state;
+	/** Element state, @c dirty and @c pad validity depends on it. */
+	/* An extra bit is needed to represent enum elem_state as signed int. */
+	enum elem_state state : 3;
+	/** If state == ELEM_FREE: the memory is not filled with zeroes. */
+	uint32_t dirty : 1;
+	/** Reserved for future use. */
+	uint32_t reserved : 28;
 	uint32_t pad;
 	size_t size;
 	struct malloc_elem *orig_elem;
@@ -320,7 +326,8 @@ malloc_elem_init(struct malloc_elem *elem,
 		struct rte_memseg_list *msl,
 		size_t size,
 		struct malloc_elem *orig_elem,
-		size_t orig_size);
+		size_t orig_size,
+		bool dirty);
 
 void
 malloc_elem_insert(struct malloc_elem *elem);
diff --git a/lib/eal/common/malloc_heap.c b/lib/eal/common/malloc_heap.c
index 55aad2711b..24080fc473 100644
--- a/lib/eal/common/malloc_heap.c
+++ b/lib/eal/common/malloc_heap.c
@@ -93,11 +93,11 @@ malloc_socket_to_heap_id(unsigned int socket_id)
  */
 static struct malloc_elem *
 malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
-		void *start, size_t len)
+		void *start, size_t len, bool dirty)
 {
 	struct malloc_elem *elem = start;
 
-	malloc_elem_init(elem, heap, msl, len, elem, len);
+	malloc_elem_init(elem, heap, msl, len, elem, len, dirty);
 
 	malloc_elem_insert(elem);
 
@@ -135,7 +135,8 @@ malloc_add_seg(const struct rte_memseg_list *msl,
 
 	found_msl = &mcfg->memsegs[msl_idx];
 
-	malloc_heap_add_memory(heap, found_msl, ms->addr, len);
+	malloc_heap_add_memory(heap, found_msl, ms->addr, len,
+			ms->flags & RTE_MEMSEG_FLAG_DIRTY);
 
 	heap->total_size += len;
 
@@ -303,7 +304,8 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
 	struct rte_memseg_list *msl;
 	struct malloc_elem *elem = NULL;
 	size_t alloc_sz;
-	int allocd_pages;
+	int allocd_pages, i;
+	bool dirty = false;
 	void *ret, *map_addr;
 
 	alloc_sz = (size_t)pg_sz * n_segs;
@@ -372,8 +374,12 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
 		goto fail;
 	}
 
+	/* Element is dirty if it contains at least one dirty page. */
+	for (i = 0; i < allocd_pages; i++)
+		dirty |= ms[i]->flags & RTE_MEMSEG_FLAG_DIRTY;
+
 	/* add newly minted memsegs to malloc heap */
-	elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
+	elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz, dirty);
 
 	/* try once more, as now we have allocated new memory */
 	ret = find_suitable_element(heap, elt_size, flags, align, bound,
@@ -1260,7 +1266,7 @@ malloc_heap_add_external_memory(struct malloc_heap *heap,
 	memset(msl->base_va, 0, msl->len);
 
 	/* now, add newly minted memory to the malloc heap */
-	malloc_heap_add_memory(heap, msl, msl->base_va, msl->len);
+	malloc_heap_add_memory(heap, msl, msl->base_va, msl->len, false);
 
 	heap->total_size += msl->len;
 
diff --git a/lib/eal/common/rte_malloc.c b/lib/eal/common/rte_malloc.c
index d0bec26920..71a3f7ecb4 100644
--- a/lib/eal/common/rte_malloc.c
+++ b/lib/eal/common/rte_malloc.c
@@ -115,15 +115,22 @@ rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket)
 {
 	void *ptr = rte_malloc_socket(type, size, align, socket);
 
+	if (ptr != NULL) {
+		struct malloc_elem *elem = malloc_elem_from_data(ptr);
+
+		if (elem->dirty) {
+			memset(ptr, 0, size);
+		} else {
 #ifdef RTE_MALLOC_DEBUG
-	/*
-	 * If DEBUG is enabled, then freed memory is marked with poison
-	 * value and set to zero on allocation.
-	 * If DEBUG is not enabled then  memory is already zeroed.
-	 */
-	if (ptr != NULL)
-		memset(ptr, 0, size);
+			/*
+			 * If DEBUG is enabled, then freed memory is marked
+			 * with a poison value and set to zero on allocation.
+			 * If DEBUG is disabled then memory is already zeroed.
+			 */
+			memset(ptr, 0, size);
 #endif
+		}
+	}
 
 	rte_eal_trace_mem_zmalloc(type, size, align, socket, ptr);
 	return ptr;
diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h
index 6d018629ae..68b069fd04 100644
--- a/lib/eal/include/rte_memory.h
+++ b/lib/eal/include/rte_memory.h
@@ -19,6 +19,7 @@
 extern "C" {
 #endif
 
+#include <rte_bitops.h>
 #include <rte_common.h>
 #include <rte_compat.h>
 #include <rte_config.h>
@@ -37,11 +38,14 @@ extern "C" {
 
 #define SOCKET_ID_ANY -1                    /**< Any NUMA socket. */
 
+/** Prevent this segment from being freed back to the OS. */
+#define RTE_MEMSEG_FLAG_DO_NOT_FREE RTE_BIT32(0)
+/** This segment is not filled with zeros. */
+#define RTE_MEMSEG_FLAG_DIRTY RTE_BIT32(1)
+
 /**
  * Physical memory segment descriptor.
  */
-#define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
-/**< Prevent this segment from being freed back to the OS. */
 struct rte_memseg {
 	rte_iova_t iova;            /**< Start IO address. */
 	RTE_STD_C11
-- 
2.25.1


  parent reply	other threads:[~2022-01-17  8:08 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-30 14:37 [RFC PATCH 0/6] Fast restart with many hugepages Dmitry Kozlyuk
2021-12-30 14:37 ` [RFC PATCH 1/6] doc: add hugepage mapping details Dmitry Kozlyuk
2021-12-30 14:37 ` [RFC PATCH 2/6] mem: add dirty malloc element support Dmitry Kozlyuk
2021-12-30 14:37 ` [RFC PATCH 3/6] eal: refactor --huge-unlink storage Dmitry Kozlyuk
2021-12-30 14:37 ` [RFC PATCH 4/6] eal/linux: allow hugepage file reuse Dmitry Kozlyuk
2021-12-30 14:48 ` [RFC PATCH 5/6] eal: allow hugepage file reuse with --huge-unlink Dmitry Kozlyuk
2021-12-30 14:49 ` [RFC PATCH 6/6] app/test: add allocator performance benchmark Dmitry Kozlyuk
2022-01-17  8:07 ` [PATCH v1 0/6] Fast restart with many hugepages Dmitry Kozlyuk
2022-01-17  8:07   ` [PATCH v1 1/6] doc: add hugepage mapping details Dmitry Kozlyuk
2022-01-17  9:20     ` Thomas Monjalon
2022-01-17  8:07   ` [PATCH v1 2/6] app/test: add allocator performance benchmark Dmitry Kozlyuk
2022-01-17 15:47     ` Bruce Richardson
2022-01-17 15:51       ` Bruce Richardson
2022-01-19 21:12         ` Dmitry Kozlyuk
2022-01-20  9:04           ` Bruce Richardson
2022-01-17 16:06     ` Aaron Conole
2022-01-17  8:07   ` Dmitry Kozlyuk [this message]
2022-01-17 14:07     ` [PATCH v1 3/6] mem: add dirty malloc element support Thomas Monjalon
2022-01-17  8:07   ` [PATCH v1 4/6] eal: refactor --huge-unlink storage Dmitry Kozlyuk
2022-01-17 14:10     ` Thomas Monjalon
2022-01-17  8:14   ` [PATCH v1 5/6] eal/linux: allow hugepage file reuse Dmitry Kozlyuk
2022-01-17 14:24     ` Thomas Monjalon
2022-01-17  8:14   ` [PATCH v1 6/6] eal: extend --huge-unlink for " Dmitry Kozlyuk
2022-01-17 14:27     ` Thomas Monjalon
2022-01-17 16:40   ` [PATCH v1 0/6] Fast restart with many hugepages Bruce Richardson
2022-01-19 21:12     ` Dmitry Kozlyuk
2022-01-20  9:05       ` Bruce Richardson
2022-01-19 21:09   ` [PATCH v2 " Dmitry Kozlyuk
2022-01-19 21:09     ` [PATCH v2 1/6] doc: add hugepage mapping details Dmitry Kozlyuk
2022-01-27 13:59       ` Bruce Richardson
2022-01-19 21:09     ` [PATCH v2 2/6] app/test: add allocator performance benchmark Dmitry Kozlyuk
2022-01-19 21:09     ` [PATCH v2 3/6] mem: add dirty malloc element support Dmitry Kozlyuk
2022-01-19 21:09     ` [PATCH v2 4/6] eal: refactor --huge-unlink storage Dmitry Kozlyuk
2022-01-19 21:11     ` [PATCH v2 5/6] eal/linux: allow hugepage file reuse Dmitry Kozlyuk
2022-01-19 21:11       ` [PATCH v2 6/6] eal: extend --huge-unlink for " Dmitry Kozlyuk
2022-01-27 12:07     ` [PATCH v2 0/6] Fast restart with many hugepages Bruce Richardson
2022-02-02 14:12     ` Thomas Monjalon
2022-02-02 21:54     ` David Marchand
2022-02-03 10:26       ` David Marchand
2022-02-03 18:13     ` [PATCH v3 " Dmitry Kozlyuk
2022-02-03 18:13       ` [PATCH v3 1/6] doc: add hugepage mapping details Dmitry Kozlyuk
2022-02-08 15:28         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 2/6] app/test: add allocator performance benchmark Dmitry Kozlyuk
2022-02-08 16:20         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 3/6] mem: add dirty malloc element support Dmitry Kozlyuk
2022-02-08 16:36         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 4/6] eal: refactor --huge-unlink storage Dmitry Kozlyuk
2022-02-08 16:39         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 5/6] eal/linux: allow hugepage file reuse Dmitry Kozlyuk
2022-02-08 17:05         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 6/6] eal: extend --huge-unlink for " Dmitry Kozlyuk
2022-02-08 17:14         ` Burakov, Anatoly
2022-02-08 20:40       ` [PATCH v3 0/6] Fast restart with many hugepages David Marchand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220117080801.481568-4-dkozlyuk@nvidia.com \
    --to=dkozlyuk@nvidia.com \
    --cc=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).