DPDK patches and discussions
 help / color / mirror / Atom feed
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Cc: andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com,
	keith.wiles@intel.com, benjamin.walker@intel.com,
	bruce.richardson@intel.com, thomas@monjalon.net
Subject: [dpdk-dev] [RFC v2 20/23] eal: make memzones use rte_fbarray
Date: Tue, 19 Dec 2017 11:14:47 +0000	[thread overview]
Message-ID: <69a29e4ac2822d0c4b1f6c599b428977b2b25505.1513681966.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <cover.1513681966.git.anatoly.burakov@intel.com>
In-Reply-To: <cover.1513681966.git.anatoly.burakov@intel.com>

We greatly expand memzone list, and it makes some operations faster.
Plus, it's there, so we might as well use it.

As part of this commit, a potential memory leak is fixed (when we
allocate a memzone but there's no room in config, we don't free it
back), and there's a compile fix for ENA driver.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 config/common_base                                |   2 +-
 drivers/net/ena/ena_ethdev.c                      |  10 +-
 lib/librte_eal/common/eal_common_memzone.c        | 168 ++++++++++++++++------
 lib/librte_eal/common/include/rte_eal_memconfig.h |   4 +-
 4 files changed, 137 insertions(+), 47 deletions(-)

diff --git a/config/common_base b/config/common_base
index 9730d4c..cce464d 100644
--- a/config/common_base
+++ b/config/common_base
@@ -92,7 +92,7 @@ CONFIG_RTE_MAX_LCORE=128
 CONFIG_RTE_MAX_NUMA_NODES=8
 CONFIG_RTE_MAX_MEMSEG_LISTS=16
 CONFIG_RTE_MAX_MEMSEG_PER_LIST=32768
-CONFIG_RTE_MAX_MEMZONE=2560
+CONFIG_RTE_MAX_MEMZONE=32768
 CONFIG_RTE_MAX_TAILQ=32
 CONFIG_RTE_ENABLE_ASSERT=n
 CONFIG_RTE_LOG_LEVEL=RTE_LOG_INFO
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 22db895..aa37cad 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -249,11 +249,15 @@ static const struct eth_dev_ops ena_dev_ops = {
 static inline int ena_cpu_to_node(int cpu)
 {
 	struct rte_config *config = rte_eal_get_configuration();
+	const struct rte_fbarray *arr = &config->mem_config->memzones;
+	const struct rte_memzone *mz;
 
-	if (likely(cpu < RTE_MAX_MEMZONE))
-		return config->mem_config->memzone[cpu].socket_id;
+	if (unlikely(cpu >= RTE_MAX_MEMZONE))
+		return NUMA_NO_NODE;
 
-	return NUMA_NO_NODE;
+	mz = rte_fbarray_get(arr, cpu);
+
+	return mz->socket_id;
 }
 
 static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf,
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index a9a4bef..58a4f25 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -58,20 +58,23 @@ static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
 	const struct rte_mem_config *mcfg;
+	const struct rte_fbarray *arr;
 	const struct rte_memzone *mz;
-	unsigned i = 0;
+	int i = 0;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
 
 	/*
 	 * the algorithm is not optimal (linear), but there are few
 	 * zones and this function should be called at init only
 	 */
-	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
-		mz = &mcfg->memzone[i];
-		if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
-			return &mcfg->memzone[i];
+	while ((i = rte_fbarray_find_next_used(arr, i)) >= 0) {
+		mz = rte_fbarray_get(arr, i++);
+		if (mz->addr != NULL &&
+				!strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
+			return mz;
 	}
 
 	return NULL;
@@ -81,17 +84,44 @@ static inline struct rte_memzone *
 get_next_free_memzone(void)
 {
 	struct rte_mem_config *mcfg;
-	unsigned i = 0;
+	struct rte_fbarray *arr;
+	int i = 0;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
+
+	i = rte_fbarray_find_next_free(arr, 0);
+	if (i < 0) {
+		/* no space in config, so try expanding the list */
+		int old_len = arr->len;
+		int new_len = old_len * 2;
+		new_len = RTE_MIN(new_len, arr->capacity);
+
+		if (old_len == new_len) {
+			/* can't expand, the list is full */
+			RTE_LOG(ERR, EAL, "%s(): no space in memzone list\n",
+				__func__);
+			return NULL;
+		}
 
-	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
-		if (mcfg->memzone[i].addr == NULL)
-			return &mcfg->memzone[i];
-	}
+		if (rte_fbarray_resize(arr, new_len)) {
+			RTE_LOG(ERR, EAL, "%s(): can't resize memzone list\n",
+				__func__);
+			return NULL;
+		}
 
-	return NULL;
+		/* ensure we have free space */
+		i = rte_fbarray_find_next_free(arr, old_len);
+
+		if (i < 0) {
+			RTE_LOG(ERR, EAL, "%s(): Cannot find room in config!\n",
+				__func__);
+			return NULL;
+		}
+	}
+	rte_fbarray_set_used(arr, i, true);
+	return rte_fbarray_get(arr, i);
 }
 
 /* This function will return the greatest free block if a heap has been
@@ -132,14 +162,16 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 {
 	struct rte_memzone *mz;
 	struct rte_mem_config *mcfg;
+	struct rte_fbarray *arr;
 	size_t requested_len;
 	int socket;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
 
 	/* no more room in config */
-	if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
+	if (arr->count >= arr->capacity) {
 		RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
 		rte_errno = ENOSPC;
 		return NULL;
@@ -231,19 +263,19 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 		return NULL;
 	}
 
-	const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
+	struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
 
 	/* fill the zone in config */
 	mz = get_next_free_memzone();
 
 	if (mz == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
-				"in config!\n", __func__);
+		RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room in config!\n",
+			__func__);
 		rte_errno = ENOSPC;
+		malloc_heap_free(elem);
 		return NULL;
 	}
 
-	mcfg->memzone_cnt++;
 	snprintf(mz->name, sizeof(mz->name), "%s", name);
 	mz->iova = rte_malloc_virt2iova(mz_addr);
 	mz->addr = mz_addr;
@@ -356,6 +388,8 @@ int
 rte_memzone_free(const struct rte_memzone *mz)
 {
 	struct rte_mem_config *mcfg;
+	struct rte_fbarray *arr;
+	struct rte_memzone *found_mz;
 	int ret = 0;
 	void *addr;
 	unsigned idx;
@@ -364,21 +398,22 @@ rte_memzone_free(const struct rte_memzone *mz)
 		return -EINVAL;
 
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
 
 	rte_rwlock_write_lock(&mcfg->mlock);
 
-	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
-	idx = idx / sizeof(struct rte_memzone);
+	idx = rte_fbarray_find_idx(arr, mz);
+	found_mz = rte_fbarray_get(arr, idx);
 
-	addr = mcfg->memzone[idx].addr;
+	addr = found_mz->addr;
 	if (addr == NULL)
 		ret = -EINVAL;
-	else if (mcfg->memzone_cnt == 0) {
+	else if (arr->count == 0) {
 		rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n",
 				__func__);
 	} else {
-		memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx]));
-		mcfg->memzone_cnt--;
+		memset(found_mz, 0, sizeof(*found_mz));
+		rte_fbarray_set_used(arr, idx, false);
 	}
 
 	rte_rwlock_write_unlock(&mcfg->mlock);
@@ -412,25 +447,71 @@ rte_memzone_lookup(const char *name)
 void
 rte_memzone_dump(FILE *f)
 {
+	struct rte_fbarray *arr;
 	struct rte_mem_config *mcfg;
-	unsigned i = 0;
+	int i = 0;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
 
 	rte_rwlock_read_lock(&mcfg->mlock);
 	/* dump all zones */
-	for (i=0; i<RTE_MAX_MEMZONE; i++) {
-		if (mcfg->memzone[i].addr == NULL)
-			break;
-		fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx"
+	while ((i = rte_fbarray_find_next_used(arr, i)) >= 0) {
+		void *cur_addr, *mz_end;
+		struct rte_memzone *mz;
+		struct rte_memseg_list *msl = NULL;
+		struct rte_memseg *ms;
+		int msl_idx, ms_idx;
+
+		mz = rte_fbarray_get(arr, i);
+
+		/*
+		 * memzones can span multiple physical pages, so dump addresses
+		 * of all physical pages this memzone spans.
+		 */
+
+		fprintf(f, "Zone %u: name:<%s>, len:0x%zx"
 		       ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
-		       mcfg->memzone[i].name,
-		       mcfg->memzone[i].iova,
-		       mcfg->memzone[i].len,
-		       mcfg->memzone[i].addr,
-		       mcfg->memzone[i].socket_id,
-		       mcfg->memzone[i].flags);
+		       mz->name,
+		       mz->len,
+		       mz->addr,
+		       mz->socket_id,
+		       mz->flags);
+
+		/* get pointer to appropriate memseg list */
+		for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+			if (mcfg->memsegs[msl_idx].hugepage_sz != mz->hugepage_sz)
+				continue;
+			if (mcfg->memsegs[msl_idx].socket_id != mz->socket_id)
+				continue;
+			msl = &mcfg->memsegs[msl_idx];
+			break;
+		}
+		if (!msl) {
+			RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n");
+			continue;
+		}
+
+		cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, mz->hugepage_sz);
+		mz_end = RTE_PTR_ADD(cur_addr, mz->len);
+
+		ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) /
+				msl->hugepage_sz;
+		ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
+
+		fprintf(f, "physical pages used:\n");
+		do {
+			fprintf(f, "  addr: %p iova: 0x%" PRIx64 " len: 0x%" PRIx64 " len: 0x%" PRIx64 "\n",
+				cur_addr, ms->iova, ms->len, ms->hugepage_sz);
+
+			/* advance VA to next page */
+			cur_addr = RTE_PTR_ADD(cur_addr, ms->hugepage_sz);
+
+			/* memzones occupy contiguous segments */
+			++ms;
+		} while (cur_addr < mz_end);
+		i++;
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
@@ -459,9 +540,11 @@ rte_eal_memzone_init(void)
 
 	rte_rwlock_write_lock(&mcfg->mlock);
 
-	/* delete all zones */
-	mcfg->memzone_cnt = 0;
-	memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
+	if (rte_fbarray_alloc(&mcfg->memzones, "memzone", 256,
+			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
+		return -1;
+	}
 
 	rte_rwlock_write_unlock(&mcfg->mlock);
 
@@ -473,14 +556,19 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 		      void *arg)
 {
 	struct rte_mem_config *mcfg;
-	unsigned i;
+	struct rte_fbarray *arr;
+	int i;
 
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
+
+	i = 0;
 
 	rte_rwlock_read_lock(&mcfg->mlock);
-	for (i=0; i<RTE_MAX_MEMZONE; i++) {
-		if (mcfg->memzone[i].addr != NULL)
-			(*func)(&mcfg->memzone[i], arg);
+	while ((i = rte_fbarray_find_next_used(arr, i)) > 0) {
+		struct rte_memzone *mz = rte_fbarray_get(arr, i);
+		(*func)(mz, arg);
+		i++;
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index c9b57a4..8f4cc34 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -86,10 +86,8 @@ struct rte_mem_config {
 	rte_rwlock_t qlock;   /**< used for tailq operation for thread safe. */
 	rte_rwlock_t mplock;  /**< only used by mempool LIB for thread-safe. */
 
-	uint32_t memzone_cnt; /**< Number of allocated memzones */
-
 	/* memory segments and zones */
-	struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */
+	struct rte_fbarray memzones; /**< Memzone descriptors. */
 
 	struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS];
 	/**< list of dynamic arrays holding memsegs */
-- 
2.7.4

  parent reply	other threads:[~2017-12-19 11:14 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-19 11:14 [dpdk-dev] [RFC v2 00/23] Dynamic memory allocation for DPDK Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 01/23] eal: move get_virtual_area out of linuxapp eal_memory.c Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 02/23] eal: add function to report number of detected sockets Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 03/23] eal: add rte_fbarray Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 04/23] eal: move all locking to heap Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 05/23] eal: protect malloc heap stats with a lock Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 06/23] eal: make malloc a doubly-linked list Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 07/23] eal: make malloc_elem_join_adjacent_free public Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 08/23] eal: add "single file segments" command-line option Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 09/23] eal: add "legacy memory" option Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 10/23] eal: read hugepage counts from node-specific sysfs path Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 11/23] eal: replace memseg with memseg lists Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 12/23] eal: add support for dynamic memory allocation Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 13/23] eal: make use of dynamic memory allocation for init Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 14/23] eal: add support for dynamic unmapping of pages Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 15/23] eal: add API to check if memory is physically contiguous Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 16/23] eal: enable dynamic memory allocation/free on malloc/free Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 17/23] eal: add backend support for contiguous memory allocation Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 18/23] eal: add rte_malloc support for allocating contiguous memory Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 19/23] eal: enable reserving physically contiguous memzones Anatoly Burakov
2017-12-19 11:14 ` Anatoly Burakov [this message]
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 21/23] mempool: add support for the new memory allocation methods Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 22/23] vfio: allow to map other memory regions Anatoly Burakov
2017-12-19 11:14 ` [dpdk-dev] [RFC v2 23/23] eal: map/unmap memory with VFIO when alloc/free pages Anatoly Burakov
2017-12-19 15:46 ` [dpdk-dev] [RFC v2 00/23] Dynamic memory allocation for DPDK Stephen Hemminger
2017-12-19 16:02   ` Burakov, Anatoly
2017-12-19 16:06     ` Stephen Hemminger
2017-12-19 16:09       ` Burakov, Anatoly
2017-12-21 21:38 ` Walker, Benjamin
2017-12-22  9:13   ` Burakov, Anatoly
2017-12-26 17:19     ` Walker, Benjamin
2018-02-02 19:28       ` Yongseok Koh
2018-02-05 10:03         ` Burakov, Anatoly
2018-02-05 10:18           ` Nélio Laranjeiro
2018-02-05 10:36             ` Burakov, Anatoly
2018-02-06  9:10               ` Nélio Laranjeiro
2018-02-14  2:01           ` Yongseok Koh
2018-02-14  9:32             ` Burakov, Anatoly
2018-02-14 18:13               ` Yongseok Koh
2018-01-13 14:13 ` Burakov, Anatoly
2018-01-23 22:33 ` Yongseok Koh
2018-01-25 16:18   ` Burakov, Anatoly
2018-02-14  8:04 ` Thomas Monjalon
2018-02-14 10:07   ` Burakov, Anatoly
2018-04-25 16:02     ` Burakov, Anatoly
2018-04-25 16:12       ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=69a29e4ac2822d0c4b1f6c599b428977b2b25505.1513681966.git.anatoly.burakov@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=andras.kovacs@ericsson.com \
    --cc=benjamin.walker@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=keith.wiles@intel.com \
    --cc=laszlo.vadkeri@ericsson.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).