DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC] mempool: introduce indexed memory pool
@ 2019-10-17  6:55 Xueming Li
  2019-10-17  7:13 ` Jerin Jacob
  2019-12-26 11:05 ` Olivier Matz
  0 siblings, 2 replies; 13+ messages in thread
From: Xueming Li @ 2019-10-17  6:55 UTC (permalink / raw)
  To: Olivier Matz, Andrew Rybchenko; +Cc: dev, Asaf Penso, Ori Kam

Indexed memory pool manages memory entries by index, allocation from
pool returns both memory pointer and index(ID). users save ID as u32
or less(u16) instead of traditional 8 bytes pointer. Memory could be
retrieved from pool or returned to pool later by index.

Pool allocates backend memory in chunk on demand, pool size grows
dynamically. Bitmap is used to track entry usage in chunk, thus
management overhead is one bit per entry.

Standard rte_malloc demands malloc overhead(64B) and minimal data
size(64B). This pool aims to such cost saving also pointer size.
For scenario like creating millions of rte_flows each consists
of small pieces of memories, the difference is huge.

Like standard memory pool, this lightweight pool only support fixed
size memory allocation. Pools should be created for each different
size.

To facilitate memory allocated by index, a set of ILIST_XXX macro
defined to operate entries as regular LIST.

By setting entry size to zero, pool can be used as ID generator.

Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
 lib/librte_mempool/Makefile                |   3 +-
 lib/librte_mempool/rte_indexed_pool.c      | 289 +++++++++++++++++++++
 lib/librte_mempool/rte_indexed_pool.h      | 224 ++++++++++++++++
 lib/librte_mempool/rte_mempool_version.map |   7 +-
 4 files changed, 521 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_mempool/rte_indexed_pool.c
 create mode 100644 lib/librte_mempool/rte_indexed_pool.h

diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile
index 20bf63fbca..343e945336 100644
--- a/lib/librte_mempool/Makefile
+++ b/lib/librte_mempool/Makefile
@@ -21,7 +21,8 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
 SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool.c
 SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool_ops.c
 SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool_ops_default.c
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_indexed_pool.c
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h rte_indexed_pool.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_mempool/rte_indexed_pool.c b/lib/librte_mempool/rte_indexed_pool.c
new file mode 100644
index 0000000000..b9069a6555
--- /dev/null
+++ b/lib/librte_mempool/rte_indexed_pool.c
@@ -0,0 +1,289 @@
+#include "rte_indexed_pool.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+
+
+/* return 1 if bitmap empty after clear. */
+static int
+map_clear_any(uint64_t *map, int n, uint32_t *idx)
+{
+	int row, col;
+
+	*idx = UINT32_MAX;
+	/* Locate free entry in trunk */
+	for (row = 0; row < n / 64; row++) {
+		if (*idx != UINT32_MAX && map[row])
+			return 0;
+		if (!map[row])
+			continue;
+		col = __builtin_ctzll(map[row]);
+		*idx = row * 64 + col;
+		map[row] &= ~(1LU << col);
+		if (map[row])
+			return 0;
+	}
+	return 1;
+}
+
+/* Return 1 if all zero. */
+static inline int __rte_unused
+map_is_empty(uint64_t *map, int n)
+{
+	int row;
+
+	for (row = 0; row < n / 64; row++) {
+		if (map[row])
+			return 0;
+	}
+	return 1;
+}
+
+
+static inline void __rte_unused
+map_clear(uint64_t *map, uint32_t idx)
+{
+	int row = idx / 64;
+	int col = idx % 64;
+
+	map[row] &= ~(1LU << col);
+}
+
+static inline void
+map_set(uint64_t *map, uint32_t idx)
+{
+	int row = idx / 64;
+	int col = idx % 64;
+
+	map[row] |= (1LU << col);
+}
+
+static inline uint64_t
+map_get(uint64_t *map, uint32_t idx)
+{
+	int row = idx / 64;
+	int col = idx % 64;
+
+	return map[row] & (1LU << col);
+}
+
+static inline void
+rte_ipool_init(struct rte_indexed_pool *pool, size_t size)
+{
+	pool->size = size;
+	pool->first_free = TRUNK_INVALID;
+	if (!pool->malloc)
+		pool->malloc = rte_malloc_socket;
+	if (!pool->free)
+		pool->free = rte_free;
+	rte_spinlock_init(&pool->lock);
+}
+
+static int
+rte_ipool_grow(struct rte_indexed_pool *pool)
+{
+	struct rte_indexed_trunk *trunk;
+	void *p;
+
+	if (pool->n_trunk == UINT16_MAX)
+		return -ENOMEM;
+	if (pool->n_trunk == pool->n_trunk_list) {
+		/* No free trunk flags, expand trunk list. */
+		int n_grow = pool->n_trunk ? pool->n_trunk :
+			     RTE_CACHE_LINE_SIZE / sizeof(void *);
+		/* rte_ralloc is buggy. */
+		p = pool->malloc(pool->type, (pool->n_trunk + n_grow) * 8,
+				 RTE_CACHE_LINE_SIZE, rte_socket_id());
+		if (!p)
+			return -ENOMEM;
+		if (pool->trunks) {
+			memcpy(p, pool->trunks, pool->n_trunk * 8);
+			pool->free(pool->trunks);
+		}
+		memset(RTE_PTR_ADD(p, pool->n_trunk * 8), 0, n_grow * 8);
+		pool->trunks = p;
+		pool->n_trunk_list += n_grow;
+	}
+	assert(pool->trunks[pool->n_trunk] == NULL);
+	trunk = pool->malloc(pool->type,
+			     sizeof(*trunk) + pool->size * IDX_POOL_TRUNK_ENTRY,
+			     RTE_CACHE_LINE_SIZE, rte_socket_id());
+	if (!trunk)
+		return -ENOMEM;
+	pool->trunks[pool->n_trunk] = trunk;
+	trunk->idx = pool->n_trunk;
+	trunk->open = 1;
+	trunk->next = TRUNK_INVALID;
+	assert(pool->first_free == TRUNK_INVALID);
+	pool->first_free = pool->n_trunk;
+	/* Mark all entries as available. */
+	memset(trunk->free, 0xff, sizeof(trunk->free));
+	pool->n_trunk++;
+#ifdef POOL_DEBUG
+	pool->trunk_new++;
+	pool->trunk_avail++;
+#endif
+	return 0;
+}
+
+void *
+rte_imalloc(struct rte_indexed_pool *pool, size_t size, uint32_t *idx)
+{
+	struct rte_indexed_trunk *trunk;
+	int empty;
+	void *p;
+
+	if (!pool->trunks)
+		rte_ipool_init(pool, size);
+	else if (pool->size != size) {
+		RTE_LOG(ERR, MEMPOOL,
+			"Trying to alloc different size from pool\n");
+		return NULL;
+	}
+	if (pool->need_lock)
+		rte_spinlock_lock(&pool->lock);
+	while (1) {
+		if (pool->first_free == TRUNK_INVALID) {
+			/* If no available trunks, grow new. */
+			if (rte_ipool_grow(pool)) {
+				if (pool->need_lock)
+					rte_spinlock_unlock(&pool->lock);
+				return NULL;
+			}
+			trunk = pool->trunks[pool->first_free];
+			break;
+		}
+		trunk = pool->trunks[pool->first_free];
+		if (trunk->open)
+			break;
+		/* Evict full used trunk from free list. */
+		pool->first_free = trunk->next;
+		trunk->next = TRUNK_INVALID;
+	}
+	assert(pool->first_free != TRUNK_INVALID);
+	assert(pool->first_free < pool->n_trunk);
+	assert(trunk->open);
+	empty = map_clear_any(trunk->free, IDX_POOL_TRUNK_ENTRY, idx);
+	assert(*idx != UINT32_MAX);
+	assert(*idx < IDX_POOL_TRUNK_ENTRY);
+	p = &trunk->data[*idx * pool->size];
+	*idx += IDX_POOL_TRUNK_ENTRY * trunk->idx;
+	*idx += 1; /* non-zero index. */
+#ifdef POOL_DEBUG
+	pool->n_entry++;
+#endif
+	if (empty) {
+		/* Full trunk will be removed from free list in imalloc. */
+		trunk->open = 0;
+#ifdef POOL_DEBUG
+		pool->trunk_empty++;
+		pool->trunk_avail--;
+#endif
+	}
+	if (pool->need_lock)
+		rte_spinlock_unlock(&pool->lock);
+	return p;
+}
+
+void *
+rte_izmalloc(struct rte_indexed_pool *pool, size_t size, uint32_t *idx)
+{
+	void *entry = rte_imalloc(pool, size, idx);
+
+	if (entry)
+		memset(entry, 0, size);
+	return entry;
+}
+
+void
+rte_ifree(struct rte_indexed_pool *pool, uint32_t idx)
+{
+	struct rte_indexed_trunk *trunk;
+
+	if (!idx)
+		return;
+	idx -= 1;
+	if (pool->need_lock)
+		rte_spinlock_lock(&pool->lock);
+	trunk = pool->trunks[idx / IDX_POOL_TRUNK_ENTRY];
+	assert(trunk);
+	assert(trunk->idx == idx / IDX_POOL_TRUNK_ENTRY);
+	map_set(trunk->free, idx % IDX_POOL_TRUNK_ENTRY);
+	if (!trunk->open) {
+		/* Trunk become available, put into free trunk list. */
+		trunk->next = pool->first_free;
+		pool->first_free = trunk->idx;
+		trunk->open = 1;
+#ifdef POOL_DEBUG
+		pool->trunk_empty--;
+		pool->trunk_avail++;
+#endif
+	}
+#ifdef POOL_DEBUG
+	pool->n_entry--;
+#endif
+	if (pool->need_lock)
+		rte_spinlock_unlock(&pool->lock);
+}
+
+void *
+rte_iget(struct rte_indexed_pool *pool, uint32_t idx)
+{
+	struct rte_indexed_trunk *trunk;
+	void *p = NULL;
+
+	if (!idx)
+		return NULL;
+	if (pool->need_lock)
+		rte_spinlock_lock(&pool->lock);
+	idx -= 1;
+	trunk = pool->trunks[idx / IDX_POOL_TRUNK_ENTRY];
+	assert(trunk);
+	assert(trunk->idx == idx / IDX_POOL_TRUNK_ENTRY);
+	if (trunk) {
+		assert(!map_get(trunk->free, idx % IDX_POOL_TRUNK_ENTRY));
+		p = &trunk->data[(idx % IDX_POOL_TRUNK_ENTRY) * pool->size];
+	}
+	if (pool->need_lock)
+		rte_spinlock_unlock(&pool->lock);
+	return p;
+}
+
+int
+rte_ipool_close(struct rte_indexed_pool *pool)
+{
+	struct rte_indexed_trunk **trunks;
+	int i;
+
+	assert(pool);
+	if (!pool->trunks)
+		return 0;
+	if (pool->need_lock)
+		rte_spinlock_lock(&pool->lock);
+	trunks = pool->trunks;
+	for (i = 0; i < pool->n_trunk_list; i++) {
+		if (trunks[i])
+			pool->free(trunks[i]);
+	}
+	pool->free(pool->trunks);
+	memset(pool, 0, sizeof(*pool));
+	return 0;
+}
+
+void
+rte_ipool_dump(struct rte_indexed_pool *pool, const char *name)
+{
+	printf("Pool %s entry size %u, trunks %hu, %d entry per trunk, total: %d\n",
+	       name, pool->size, pool->n_trunk, IDX_POOL_TRUNK_ENTRY,
+	       pool->n_trunk * IDX_POOL_TRUNK_ENTRY);
+#ifdef POOL_DEBUG
+	printf("Pool %s entry %ld, trunk alloc %ld, empty: %ld, available %ld free %ld\n",
+	       name, pool->n_entry, pool->trunk_new, pool->trunk_empty,
+	       pool->trunk_avail, pool->trunk_free);
+#endif
+}
diff --git a/lib/librte_mempool/rte_indexed_pool.h b/lib/librte_mempool/rte_indexed_pool.h
new file mode 100644
index 0000000000..73640c5a1b
--- /dev/null
+++ b/lib/librte_mempool/rte_indexed_pool.h
@@ -0,0 +1,224 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2018 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_MEM_POOL_H_
+#define RTE_MEM_POOL_H_
+
+#include <unistd.h>
+#include <stdint.h>
+
+#include <rte_spinlock.h>
+#include <rte_memory.h>
+
+#define IDX_POOL_TRUNK_ENTRY (63 * 64)
+#define TRUNK_INVALID UINT16_MAX
+#define POOL_DEBUG 1
+
+struct rte_indexed_trunk {
+	uint64_t free[IDX_POOL_TRUNK_ENTRY / 64]; /* map of free entries. */
+	uint16_t idx; /* Trunk id. */
+	uint16_t next; /* Next free trunk in free list. */
+	uint8_t open; /* Free entries available, enlisted in free list */
+	uint8_t data[] __rte_cache_min_aligned; /* Entry data start. */
+};
+
+struct rte_indexed_pool {
+	rte_spinlock_t lock;
+	uint32_t size; /* Entry size. */
+	uint16_t n_trunk; /* Trunks allocated. */
+	uint16_t n_trunk_list; /* Trunk pointer array size. */
+	/* Dim of trunk pointer array. */
+	struct rte_indexed_trunk **trunks;
+	uint16_t first_free; /* Index to first free trunk. */
+	int need_lock:1;
+	int no_trunk_free:1;
+	const char *type;
+	void *(*malloc)(const char *type, size_t size, unsigned int align,
+			int socket);
+	void (*free)(void *addr);
+#ifdef POOL_DEBUG
+	int64_t n_entry;
+	int64_t trunk_new;
+	int64_t trunk_avail;
+	int64_t trunk_empty;
+	int64_t trunk_free;
+#endif
+};
+
+/**
+ * This function allocates non-initialized memory from pool.
+ * In NUMA systems, the memory allocated resides on the same
+ * NUMA socket as the core that calls this function.
+ *
+ * Memory is allocated from memory trunk, no alignment.
+ * If size 0 specified, essential a unique ID generator.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ *   No initialization required.
+ * @param size
+ *   Size (in bytes) to be allocated.
+ *   Size has to be same in each allocation request to same pool.
+ * @param[out] idx
+ *   Pointer to memory to save allocated index.
+ *   Memory index always positive value.
+ * @return
+ *   - Pointer to the allocated memory
+ *   - NULL on error. Not enough memory, or invalid arguments.
+ */
+__rte_experimental
+void *rte_imalloc(struct rte_indexed_pool *pool, size_t size, uint32_t *idx);
+
+/**
+ * This function allocates zero initialized memory from pool.
+ * In NUMA systems, the memory allocated resides on the same
+ * NUMA socket as the core that calls this function.
+ *
+ * Memory is allocated from memory trunk, no alignment.
+ * If size 0 specified, essential a unique ID generator.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ *   No initialization required.
+ * @param size
+ *   Size (in bytes) to be allocated.
+ *   Size has to be same in each allocation request to same pool.
+ * @param[out] idx
+ *   Pointer to memory to save allocated index.
+ *   Memory index always positive value.
+ * @return
+ *   - Pointer to the allocated memory
+ *   - NULL on error. Not enough memory, or invalid arguments.
+ */
+__rte_experimental
+void *rte_izmalloc(struct rte_indexed_pool *pool, size_t size, uint32_t *idx);
+
+/**
+ * This function free indexed memory to pool.
+ * Caller has to make sure that the index is allocated from same pool.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ * @param idx
+ *   Allocated memory index.
+ */
+__rte_experimental
+void rte_ifree(struct rte_indexed_pool *pool, uint32_t idx);
+
+/**
+ * This function return pointer of indexed memory from index.
+ * Caller has to make sure that the index is allocated from same pool.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ * @param idx
+ *   Pointer of memory index allocated.
+ * @return
+ *   - Pointer to indexed memory.
+ *   - NULL on not found.
+ */
+__rte_experimental
+void *rte_iget(struct rte_indexed_pool *pool, uint32_t idx);
+
+/**
+ * This function release all resources of pool.
+ * Caller has to make sure that all indexes and memories allocated
+ * from this pool not referenced anymore.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ * @return
+ *   - non-zero value on error.
+ *   - 0 on success.
+ */
+__rte_experimental
+int rte_ipool_close(struct rte_indexed_pool *pool);
+
+/**
+ * This function dump debug info of pool.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ */
+__rte_experimental
+void rte_ipool_dump(struct rte_indexed_pool *pool, const char *name);
+
+/*
+ * Macros for linked list based on indexed memory.
+ * Example data structure:
+ * struct Foo {
+ *	ILIST_ENTRY(uint16_t) next;
+ *	...
+ * }
+ *
+ */
+#define ILIST_ENTRY(type)						\
+struct {								\
+	type prev; /* Index of previous element. */			\
+	type next; /* Index of next element. */				\
+}
+
+#define ILIST_INSERT(pool, idx, elem, field, head, peer)		\
+	do {								\
+		assert(elem && idx);					\
+		elem->field.next = *head;				\
+		elem->field.prev = 0;					\
+		if (*head) {						\
+			peer = rte_iget(pool, *head);			\
+			peer->field.prev = idx;				\
+		}							\
+		*head = idx;						\
+	} while (0)
+
+#define ILIST_REMOVE(pool, elem, idx, field, head, peer)		\
+	do {								\
+		assert(elem);						\
+		assert(head);						\
+		if (elem->field.prev) {					\
+			peer = rte_iget(pool, elem->field.prev);	\
+			peer->field.next = elem->field.next;		\
+		}							\
+		if (elem->field.next) {					\
+			peer = rte_iget(pool, elem->field.next);	\
+			peer->field.prev = elem->field.prev;		\
+		}							\
+		if (*head == idx)					\
+			*head = elem->field.next;			\
+	} while (0)
+
+#define ILIST_FOREACH(pool, head, idx, elem, field)			\
+	for (idx = head, elem = rte_iget(pool, idx);			\
+	     elem;							\
+	     idx = elem->field.next, elem = rte_iget(pool, idx))
+
+#endif /* RTE_MEM_POOL_H_ */
+
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 17cbca4607..3b264189be 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -41,7 +41,6 @@ DPDK_17.11 {
 	global:
 
 	rte_mempool_populate_iova;
-
 } DPDK_16.07;
 
 DPDK_18.05 {
@@ -57,4 +56,10 @@ EXPERIMENTAL {
 	global:
 
 	rte_mempool_ops_get_info;
+	rte_imalloc;
+	rte_izmalloc;
+	rte_ifree;
+	rte_iget;
+	rte_ipool_close;
+	rte_ipool_dump;
 };
-- 
2.17.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-03-06  8:57 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-17  6:55 [dpdk-dev] [RFC] mempool: introduce indexed memory pool Xueming Li
2019-10-17  7:13 ` Jerin Jacob
2019-10-17 13:13   ` Xueming(Steven) Li
2019-10-17 16:40     ` Jerin Jacob
2019-10-18 10:10       ` Xueming(Steven) Li
2019-10-19 12:28         ` Jerin Jacob
2019-10-25 15:29           ` Xueming(Steven) Li
2019-10-25 16:28             ` Jerin Jacob
2019-12-26 11:05 ` Olivier Matz
2020-03-05  7:43   ` Suanming Mou
2020-03-05  9:52     ` Morten Brørup
2020-03-06  7:27       ` Suanming Mou
2020-03-06  8:57         ` Morten Brørup

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).