From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 1718A5F29 for ; Fri, 21 Sep 2018 18:14:26 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 21 Sep 2018 09:14:25 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.54,285,1534834800"; d="scan'208";a="90821618" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga004.fm.intel.com with ESMTP; 21 Sep 2018 09:14:10 -0700 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w8LGEA7H029186; Fri, 21 Sep 2018 17:14:10 +0100 Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w8LGEAeh002781; Fri, 21 Sep 2018 17:14:10 +0100 Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w8LGEA3U002777; Fri, 21 Sep 2018 17:14:10 +0100 From: Anatoly Burakov To: dev@dpdk.org Cc: Thomas Monjalon , Bruce Richardson , laszlo.madarassy@ericsson.com, laszlo.vadkerti@ericsson.com, andras.kovacs@ericsson.com, winnie.tian@ericsson.com, daniel.andrasi@ericsson.com, janos.kobor@ericsson.com, geza.koblo@ericsson.com, srinath.mannam@broadcom.com, scott.branden@broadcom.com, ajit.khaparde@broadcom.com, keith.wiles@intel.com, shreyansh.jain@nxp.com, shahafs@mellanox.com, arybchenko@solarflare.com Date: Fri, 21 Sep 2018 17:13:52 +0100 Message-Id: <533ab6ba2bd1287b135154c1d1978a250ff5ca3a.1537546029.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH v4 03/20] malloc: index heaps using heap ID rather than NUMA node X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 21 Sep 2018 16:14:27 -0000 Switch over all parts of EAL to use heap ID instead of NUMA node ID to identify heaps. Heap ID for DPDK-internal heaps is NUMA node's index within the detected NUMA node list. Heap ID for external heaps will be order of their creation. Signed-off-by: Anatoly Burakov --- config/common_base | 1 + config/rte_config.h | 1 + .../common/include/rte_eal_memconfig.h | 4 +- .../common/include/rte_malloc_heap.h | 1 + lib/librte_eal/common/malloc_heap.c | 98 +++++++++++++------ lib/librte_eal/common/malloc_heap.h | 3 + lib/librte_eal/common/rte_malloc.c | 41 +++++--- 7 files changed, 106 insertions(+), 43 deletions(-) diff --git a/config/common_base b/config/common_base index 155c7d40e..b52770b27 100644 --- a/config/common_base +++ b/config/common_base @@ -61,6 +61,7 @@ CONFIG_RTE_CACHE_LINE_SIZE=64 CONFIG_RTE_LIBRTE_EAL=y CONFIG_RTE_MAX_LCORE=128 CONFIG_RTE_MAX_NUMA_NODES=8 +CONFIG_RTE_MAX_HEAPS=32 CONFIG_RTE_MAX_MEMSEG_LISTS=64 # each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages # or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller diff --git a/config/rte_config.h b/config/rte_config.h index 567051b9c..5dd2ac1ad 100644 --- a/config/rte_config.h +++ b/config/rte_config.h @@ -24,6 +24,7 @@ #define RTE_BUILD_SHARED_LIB /* EAL defines */ +#define RTE_MAX_HEAPS 32 #define RTE_MAX_MEMSEG_LISTS 128 #define RTE_MAX_MEMSEG_PER_LIST 8192 #define RTE_MAX_MEM_MB_PER_LIST 32768 diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h index 6baa6854f..d7920a4e0 100644 --- a/lib/librte_eal/common/include/rte_eal_memconfig.h +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -72,8 +72,8 @@ struct rte_mem_config { struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */ - /* Heaps of Malloc per socket */ - struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES]; + /* Heaps of Malloc */ + struct malloc_heap malloc_heaps[RTE_MAX_HEAPS]; /* address of mem_config in primary process. used to map shared config into * exact same address the primary process maps it. diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h index d43fa9097..e7ac32d42 100644 --- a/lib/librte_eal/common/include/rte_malloc_heap.h +++ b/lib/librte_eal/common/include/rte_malloc_heap.h @@ -27,6 +27,7 @@ struct malloc_heap { unsigned alloc_count; size_t total_size; + unsigned int socket_id; } __rte_cache_aligned; #endif /* _RTE_MALLOC_HEAP_H_ */ diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 3c8e2063b..1d1e35708 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -66,6 +66,21 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) return check_flag & flags; } +int +malloc_socket_to_heap_id(unsigned int socket_id) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i; + + for (i = 0; i < RTE_MAX_HEAPS; i++) { + struct malloc_heap *heap = &mcfg->malloc_heaps[i]; + + if (heap->socket_id == socket_id) + return i; + } + return -1; +} + /* * Expand the heap with a memory area. */ @@ -93,12 +108,13 @@ malloc_add_seg(const struct rte_memseg_list *msl, struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; struct rte_memseg_list *found_msl; struct malloc_heap *heap; - int msl_idx; + int msl_idx, heap_idx; if (msl->external) return 0; - heap = &mcfg->malloc_heaps[msl->socket_id]; + heap_idx = malloc_socket_to_heap_id(msl->socket_id); + heap = &mcfg->malloc_heaps[heap_idx]; /* msl is const, so find it */ msl_idx = msl - mcfg->memsegs; @@ -111,6 +127,7 @@ malloc_add_seg(const struct rte_memseg_list *msl, malloc_heap_add_memory(heap, found_msl, ms->addr, len); heap->total_size += len; + heap->socket_id = msl->socket_id; RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20, msl->socket_id); @@ -561,12 +578,14 @@ alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket, /* this will try lower page sizes first */ static void * -heap_alloc_on_socket(const char *type, size_t size, int socket, - unsigned int flags, size_t align, size_t bound, bool contig) +malloc_heap_alloc_on_heap_id(const char *type, size_t size, + unsigned int heap_id, unsigned int flags, size_t align, + size_t bound, bool contig) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct malloc_heap *heap = &mcfg->malloc_heaps[socket]; + struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY; + int socket_id; void *ret; rte_spinlock_lock(&(heap->lock)); @@ -584,12 +603,28 @@ heap_alloc_on_socket(const char *type, size_t size, int socket, * we may still be able to allocate memory from appropriate page sizes, * we just need to request more memory first. */ + + socket_id = rte_socket_id_by_idx(heap_id); + /* + * if socket ID is negative, we cannot find a socket ID for this heap - + * which means it's an external heap. those can have unexpected page + * sizes, so if the user asked to allocate from there - assume user + * knows what they're doing, and allow allocating from there with any + * page size flags. + */ + if (socket_id < 0) + size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY; + ret = heap_alloc(heap, type, size, size_flags, align, bound, contig); if (ret != NULL) goto alloc_unlock; - if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound, - contig)) { + /* if socket ID is invalid, this is an external heap */ + if (socket_id < 0) + goto alloc_unlock; + + if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align, + bound, contig)) { ret = heap_alloc(heap, type, size, flags, align, bound, contig); /* this should have succeeded */ @@ -605,7 +640,7 @@ void * malloc_heap_alloc(const char *type, size_t size, int socket_arg, unsigned int flags, size_t align, size_t bound, bool contig) { - int socket, i, cur_socket; + int socket, heap_id, i; void *ret; /* return NULL if size is 0 or alignment is not power-of-2 */ @@ -620,22 +655,25 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg, else socket = socket_arg; - /* Check socket parameter */ - if (socket >= RTE_MAX_NUMA_NODES) + /* turn socket ID into heap ID */ + heap_id = malloc_socket_to_heap_id(socket); + /* if heap id is negative, socket ID was invalid */ + if (heap_id < 0) return NULL; - ret = heap_alloc_on_socket(type, size, socket, flags, align, bound, - contig); + ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align, + bound, contig); if (ret != NULL || socket_arg != SOCKET_ID_ANY) return ret; - /* try other heaps */ + /* try other heaps. we are only iterating through native DPDK sockets, + * so external heaps won't be included. + */ for (i = 0; i < (int) rte_socket_count(); i++) { - cur_socket = rte_socket_id_by_idx(i); - if (cur_socket == socket) + if (i == heap_id) continue; - ret = heap_alloc_on_socket(type, size, cur_socket, flags, - align, bound, contig); + ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align, + bound, contig); if (ret != NULL) return ret; } @@ -643,11 +681,11 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg, } static void * -heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags, - size_t align, bool contig) +heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id, + unsigned int flags, size_t align, bool contig) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct malloc_heap *heap = &mcfg->malloc_heaps[socket]; + struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; void *ret; rte_spinlock_lock(&(heap->lock)); @@ -665,7 +703,7 @@ void * malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags, size_t align, bool contig) { - int socket, i, cur_socket; + int socket, i, cur_socket, heap_id; void *ret; /* return NULL if align is not power-of-2 */ @@ -680,11 +718,13 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags, else socket = socket_arg; - /* Check socket parameter */ - if (socket >= RTE_MAX_NUMA_NODES) + /* turn socket ID into heap ID */ + heap_id = malloc_socket_to_heap_id(socket); + /* if heap id is negative, socket ID was invalid */ + if (heap_id < 0) return NULL; - ret = heap_alloc_biggest_on_socket(type, socket, flags, align, + ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align, contig); if (ret != NULL || socket_arg != SOCKET_ID_ANY) return ret; @@ -694,8 +734,8 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags, cur_socket = rte_socket_id_by_idx(i); if (cur_socket == socket) continue; - ret = heap_alloc_biggest_on_socket(type, cur_socket, flags, - align, contig); + ret = heap_alloc_biggest_on_heap_id(type, i, flags, align, + contig); if (ret != NULL) return ret; } @@ -760,7 +800,7 @@ malloc_heap_free(struct malloc_elem *elem) /* ...of which we can't avail if we are in legacy mode, or if this is an * externally allocated segment. */ - if (internal_config.legacy_mem || msl->external) + if (internal_config.legacy_mem || (msl->external > 0)) goto free_unlock; /* check if we can free any memory back to the system */ @@ -917,7 +957,7 @@ malloc_heap_resize(struct malloc_elem *elem, size_t size) } /* - * Function to retrieve data for heap on given socket + * Function to retrieve data for a given heap */ int malloc_heap_get_stats(struct malloc_heap *heap, @@ -955,7 +995,7 @@ malloc_heap_get_stats(struct malloc_heap *heap, } /* - * Function to retrieve data for heap on given socket + * Function to retrieve data for a given heap */ void malloc_heap_dump(struct malloc_heap *heap, FILE *f) diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index f52cb5559..61b844b6f 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -46,6 +46,9 @@ malloc_heap_get_stats(struct malloc_heap *heap, void malloc_heap_dump(struct malloc_heap *heap, FILE *f); +int +malloc_socket_to_heap_id(unsigned int socket_id); + int rte_eal_malloc_heap_init(void); diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 47ca5a742..73d6df31d 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -152,11 +152,20 @@ rte_malloc_get_socket_stats(int socket, struct rte_malloc_socket_stats *socket_stats) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int heap_idx, ret = -1; - if (socket >= RTE_MAX_NUMA_NODES || socket < 0) - return -1; + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); - return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats); + heap_idx = malloc_socket_to_heap_id(socket); + if (heap_idx < 0) + goto unlock; + + ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx], + socket_stats); +unlock: + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; } /* @@ -168,12 +177,14 @@ rte_malloc_dump_heaps(FILE *f) struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; unsigned int idx; - for (idx = 0; idx < rte_socket_count(); idx++) { - unsigned int socket = rte_socket_id_by_idx(idx); - fprintf(f, "Heap on socket %i:\n", socket); - malloc_heap_dump(&mcfg->malloc_heaps[socket], f); + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + + for (idx = 0; idx < RTE_MAX_HEAPS; idx++) { + fprintf(f, "Heap id: %u\n", idx); + malloc_heap_dump(&mcfg->malloc_heaps[idx], f); } + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); } /* @@ -182,14 +193,19 @@ rte_malloc_dump_heaps(FILE *f) void rte_malloc_dump_stats(FILE *f, __rte_unused const char *type) { - unsigned int socket; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + unsigned int heap_id; struct rte_malloc_socket_stats sock_stats; + + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + /* Iterate through all initialised heaps */ - for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) { - if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0)) - continue; + for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) { + struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; - fprintf(f, "Socket:%u\n", socket); + malloc_heap_get_stats(heap, &sock_stats); + + fprintf(f, "Heap id:%u\n", heap_id); fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes); fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes); fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes); @@ -198,6 +214,7 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type) fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count); fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count); } + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); return; } -- 2.17.1