From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <aburakov@ecsmtp.ir.intel.com>
Received: from mga12.intel.com (mga12.intel.com [192.55.52.136])
 by dpdk.org (Postfix) with ESMTP id D1EED1E35
 for <dev@dpdk.org>; Wed,  2 May 2018 17:38:24 +0200 (CEST)
X-Amp-Result: SKIPPED(no attachment in message)
X-Amp-File-Uploaded: False
Received: from orsmga006.jf.intel.com ([10.7.209.51])
 by fmsmga106.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;
 02 May 2018 08:38:18 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.49,354,1520924400"; d="scan'208";a="38717756"
Received: from irvmail001.ir.intel.com ([163.33.26.43])
 by orsmga006.jf.intel.com with ESMTP; 02 May 2018 08:38:17 -0700
Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com
 [10.237.217.45])
 by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id
 w42FcGK1010671; Wed, 2 May 2018 16:38:16 +0100
Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1])
 by sivswdev01.ir.intel.com with ESMTP id w42FcGuS006405;
 Wed, 2 May 2018 16:38:16 +0100
Received: (from aburakov@localhost)
 by sivswdev01.ir.intel.com with LOCAL id w42FcGvf006399;
 Wed, 2 May 2018 16:38:16 +0100
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Cc: shahafs@mellanox.com, thomasm@mellanox.com, olgas@mellanox.com,
 rasland@mellanox.com, anatoly.burakov@intel.com
Date: Wed,  2 May 2018 16:38:16 +0100
Message-Id: <454d2b8eea6615a20e496fa924cc04feda9bb686.1525274971.git.anatoly.burakov@intel.com>
X-Mailer: git-send-email 1.7.0.7
In-Reply-To: <d364f98742dd9d18e25a2280dadfff08a02d9a2e.1525274971.git.anatoly.burakov@intel.com>
References: <d364f98742dd9d18e25a2280dadfff08a02d9a2e.1525274971.git.anatoly.burakov@intel.com>
In-Reply-To: <d364f98742dd9d18e25a2280dadfff08a02d9a2e.1525274971.git.anatoly.burakov@intel.com>
References: <d364f98742dd9d18e25a2280dadfff08a02d9a2e.1525274971.git.anatoly.burakov@intel.com>
Subject: [dpdk-dev] [PATCH 2/2] malloc: avoid padding elements on page
	deallocation
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Wed, 02 May 2018 15:38:25 -0000

Currently, when deallocating pages, malloc will fixup other
elements' headers if there is not enough space to store a full
element in leftover space. This leads to race conditions because
there are some functions that check for pad size with an unlocked
heap, expecting pad size to be constant.

Fix it by being more conservative and only freeing pages when
there is enough space before and after the page to store a free
element.

Fixes: 1403f87d4fb8 ("malloc: enable memory hotplug support")
Cc: anatoly.burakov@intel.com

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/malloc_elem.c | 50 ++-----------------------------------
 lib/librte_eal/common/malloc_elem.h |  2 ++
 lib/librte_eal/common/malloc_heap.c | 38 +++++++++++++++++++++++++++-
 3 files changed, 41 insertions(+), 49 deletions(-)

diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 0a86d34..46226ca 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -22,7 +22,6 @@
 #include "malloc_elem.h"
 #include "malloc_heap.h"
 
-#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
 
 /*
  * Initialize a general malloc_elem header structure
@@ -476,27 +475,6 @@ malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
 			split_elem(elem, hide_end);
 
 			malloc_elem_free_list_insert(hide_end);
-		} else if (len_after >= MALLOC_ELEM_HEADER_LEN) {
-			/* shrink current element */
-			elem->size -= len_after;
-			memset(hide_end, 0, sizeof(*hide_end));
-
-			/* copy next element's data to our pad */
-			memcpy(hide_end, next, sizeof(*hide_end));
-
-			/* pad next element */
-			next->state = ELEM_PAD;
-			next->pad = len_after;
-			next->size -= len_after;
-
-			/* next element busy, would've been merged otherwise */
-			hide_end->pad = len_after;
-			hide_end->size += len_after;
-
-			/* adjust pointers to point to our new pad */
-			if (next->next)
-				next->next->prev = hide_end;
-			elem->next = hide_end;
 		} else if (len_after > 0) {
 			RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
 			return;
@@ -515,32 +493,8 @@ malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
 
 			malloc_elem_free_list_insert(prev);
 		} else if (len_before > 0) {
-			/*
-			 * unlike with elements after current, here we don't
-			 * need to pad elements, but rather just increase the
-			 * size of previous element, copy the old header and set
-			 * up trailer.
-			 */
-			void *trailer = RTE_PTR_ADD(prev,
-					prev->size - MALLOC_ELEM_TRAILER_LEN);
-
-			memcpy(hide_start, elem, sizeof(*elem));
-			hide_start->size = len;
-
-			prev->size += len_before;
-			set_trailer(prev);
-
-			/* update pointers */
-			prev->next = hide_start;
-			if (next)
-				next->prev = hide_start;
-
-			/* erase old trailer */
-			memset(trailer, 0, MALLOC_ELEM_TRAILER_LEN);
-			/* erase old header */
-			memset(elem, 0, sizeof(*elem));
-
-			elem = hide_start;
+			RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+			return;
 		}
 	}
 
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index 8f4aef8..7331af9 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -9,6 +9,8 @@
 
 #include <rte_eal_memconfig.h>
 
+#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
+
 /* dummy definition of struct so we can use pointers to it in malloc_elem struct */
 struct malloc_heap;
 
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 633e306..28c137a 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -609,7 +609,7 @@ malloc_heap_free(struct malloc_elem *elem)
 	void *start, *aligned_start, *end, *aligned_end;
 	size_t len, aligned_len, page_sz;
 	struct rte_memseg_list *msl;
-	unsigned int i, n_segs;
+	unsigned int i, n_segs, before_space, after_space;
 	int ret;
 
 	if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
@@ -673,6 +673,42 @@ malloc_heap_free(struct malloc_elem *elem)
 	if (n_segs == 0)
 		goto free_unlock;
 
+	/* we're not done yet. we also have to check if by freeing space we will
+	 * be leaving free elements that are too small to store new elements.
+	 * check if we have enough space in the beginning and at the end, or if
+	 * start/end are exactly page alighed.
+	 */
+	before_space = RTE_PTR_DIFF(aligned_start, elem);
+	after_space = RTE_PTR_DIFF(end, aligned_end);
+	if (before_space != 0 &&
+			before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* there is not enough space before start, but we may be able to
+		 * move the start forward by one page.
+		 */
+		if (n_segs == 1)
+			goto free_unlock;
+
+		/* move start */
+		aligned_start = RTE_PTR_ADD(aligned_start, page_sz);
+		aligned_len -= page_sz;
+		n_segs--;
+	}
+	if (after_space != 0 && after_space <
+			MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* there is not enough space after end, but we may be able to
+		 * move the end backwards by one page.
+		 */
+		if (n_segs == 1)
+			goto free_unlock;
+
+		/* move end */
+		aligned_end = RTE_PTR_SUB(aligned_end, page_sz);
+		aligned_len -= page_sz;
+		n_segs--;
+	}
+
+	/* now we can finally free us some pages */
+
 	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
 
 	/*
-- 
2.7.4