From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 8BB32A0588; Thu, 16 Apr 2020 04:42:28 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id E3EE51D9AA; Thu, 16 Apr 2020 04:42:20 +0200 (CEST) Received: from git-send-mailer.rdmz.labs.mlnx (unknown [37.142.13.130]) by dpdk.org (Postfix) with ESMTP id 3056A1D996 for ; Thu, 16 Apr 2020 04:42:18 +0200 (CEST) From: Suanming Mou To: viacheslavo@mellanox.com, matan@mellanox.com Cc: rasland@mellanox.com, dev@dpdk.org Date: Thu, 16 Apr 2020 10:41:59 +0800 Message-Id: <1587004928-328077-2-git-send-email-suanmingm@mellanox.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1587004928-328077-1-git-send-email-suanmingm@mellanox.com> References: <1586740309-449310-1-git-send-email-suanmingm@mellanox.com> <1587004928-328077-1-git-send-email-suanmingm@mellanox.com> Subject: [dpdk-dev] [PATCH v2 01/10] net/mlx5: add indexed memory pool X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Currently, the memory allocated by rte_malloc() also introduced more than 64 bytes overhead. It means when allocate 64 bytes memory, the real cost in memory maybe double. And the libc malloc() overhead is 16 bytes, If users try allocating millions of small memory blocks, the overhead costing maybe huge. And save the memory pointer will also be quite expensive. Indexed memory pool is introduced to save the memory for allocating huge amount of small memory blocks. The indexed memory uses trunk and bitmap to manage the memory entries. While the pool is empty, the trunk slot contains memory entry array will be allocated firstly. The bitmap in the trunk records the entry allocation. The offset of trunk slot in the pool and the offset of memory entry in the trunk slot compose the index for the memory entry. So, by the index, it will be very easy to address the memory of the entry. User saves the 32 bits index for the memory resource instead of the 64 bits pointer. User should create different pools for allocating different size of small memory block. It means one pool provides one fixed size of small memory blocked allocating. Signed-off-by: Suanming Mou Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5_utils.c | 261 ++++++++++++++++++++++++++++++++++++++++++ drivers/net/mlx5/mlx5_utils.h | 229 ++++++++++++++++++++++++++++++++++++ 2 files changed, 490 insertions(+) diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c index 4b4fc3c..4cab7f0 100644 --- a/drivers/net/mlx5/mlx5_utils.c +++ b/drivers/net/mlx5/mlx5_utils.c @@ -117,3 +117,264 @@ struct mlx5_hlist_entry * } rte_free(h); } + +static inline void +mlx5_ipool_lock(struct mlx5_indexed_pool *pool) +{ + if (pool->cfg.need_lock) + rte_spinlock_lock(&pool->lock); +} + +static inline void +mlx5_ipool_unlock(struct mlx5_indexed_pool *pool) +{ + if (pool->cfg.need_lock) + rte_spinlock_unlock(&pool->lock); +} + +struct mlx5_indexed_pool * +mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg) +{ + struct mlx5_indexed_pool *pool; + + if (!cfg || !cfg->size || (!cfg->malloc ^ !cfg->free) || + (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) || + ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32)))) + return NULL; + pool = rte_zmalloc("mlx5_ipool", sizeof(*pool), RTE_CACHE_LINE_SIZE); + if (!pool) + return NULL; + pool->cfg = *cfg; + if (!pool->cfg.trunk_size) + pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE; + if (!cfg->malloc && !cfg->free) { + pool->cfg.malloc = rte_malloc_socket; + pool->cfg.free = rte_free; + } + pool->free_list = TRUNK_INVALID; + if (pool->cfg.need_lock) + rte_spinlock_init(&pool->lock); + return pool; +} + +static int +mlx5_ipool_grow(struct mlx5_indexed_pool *pool) +{ + struct mlx5_indexed_trunk *trunk; + struct mlx5_indexed_trunk **trunk_tmp; + struct mlx5_indexed_trunk **p; + size_t trunk_size = 0; + size_t bmp_size; + uint32_t idx; + + if (pool->n_trunk_valid == TRUNK_MAX_IDX) + return -ENOMEM; + if (pool->n_trunk_valid == pool->n_trunk) { + /* No free trunk flags, expand trunk list. */ + int n_grow = pool->n_trunk_valid ? pool->n_trunk : + RTE_CACHE_LINE_SIZE / sizeof(void *); + + p = pool->cfg.malloc(pool->cfg.type, + (pool->n_trunk_valid + n_grow) * + sizeof(struct mlx5_indexed_trunk *), + RTE_CACHE_LINE_SIZE, rte_socket_id()); + if (!p) + return -ENOMEM; + if (pool->trunks) + memcpy(p, pool->trunks, pool->n_trunk_valid * + sizeof(struct mlx5_indexed_trunk *)); + memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0, + n_grow * sizeof(void *)); + trunk_tmp = pool->trunks; + pool->trunks = p; + if (trunk_tmp) + pool->cfg.free(pool->trunks); + pool->n_trunk += n_grow; + } + idx = pool->n_trunk_valid; + trunk_size += sizeof(*trunk); + bmp_size = rte_bitmap_get_memory_footprint(pool->cfg.trunk_size); + trunk_size += pool->cfg.trunk_size * pool->cfg.size + bmp_size; + trunk = pool->cfg.malloc(pool->cfg.type, trunk_size, + RTE_CACHE_LINE_SIZE, rte_socket_id()); + if (!trunk) + return -ENOMEM; + pool->trunks[idx] = trunk; + trunk->idx = idx; + trunk->free = pool->cfg.trunk_size; + trunk->prev = TRUNK_INVALID; + trunk->next = TRUNK_INVALID; + MLX5_ASSERT(pool->free_list == TRUNK_INVALID); + pool->free_list = idx; + /* Mark all entries as available. */ + trunk->bmp = rte_bitmap_init_with_all_set(pool->cfg.trunk_size, + &trunk->data[pool->cfg.trunk_size * pool->cfg.size], + bmp_size); + pool->n_trunk_valid++; +#ifdef POOL_DEBUG + pool->trunk_new++; + pool->trunk_avail++; +#endif + return 0; +} + +void * +mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx) +{ + struct mlx5_indexed_trunk *trunk; + uint64_t slab = 0; + uint32_t iidx = 0; + void *p; + + mlx5_ipool_lock(pool); + if (pool->free_list == TRUNK_INVALID) { + /* If no available trunks, grow new. */ + if (mlx5_ipool_grow(pool)) { + mlx5_ipool_unlock(pool); + return NULL; + } + } + MLX5_ASSERT(pool->free_list != TRUNK_INVALID); + trunk = pool->trunks[pool->free_list]; + MLX5_ASSERT(trunk->free); + if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) { + mlx5_ipool_unlock(pool); + return NULL; + } + MLX5_ASSERT(slab); + iidx += __builtin_ctzll(slab); + MLX5_ASSERT(iidx != UINT32_MAX); + MLX5_ASSERT(iidx < pool->cfg.trunk_size); + rte_bitmap_clear(trunk->bmp, iidx); + p = &trunk->data[iidx * pool->cfg.size]; + iidx += trunk->idx * pool->cfg.trunk_size; + iidx += 1; /* non-zero index. */ + trunk->free--; +#ifdef POOL_DEBUG + pool->n_entry++; +#endif + if (!trunk->free) { + /* Full trunk will be removed from free list in imalloc. */ + MLX5_ASSERT(pool->free_list == trunk->idx); + pool->free_list = trunk->next; + if (trunk->next != TRUNK_INVALID) + pool->trunks[trunk->next]->prev = TRUNK_INVALID; + trunk->prev = TRUNK_INVALID; + trunk->next = TRUNK_INVALID; +#ifdef POOL_DEBUG + pool->trunk_empty++; + pool->trunk_avail--; +#endif + } + *idx = iidx; + mlx5_ipool_unlock(pool); + return p; +} + +void * +mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx) +{ + void *entry = mlx5_ipool_malloc(pool, idx); + + if (entry) + memset(entry, 0, pool->cfg.size); + return entry; +} + +void +mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx) +{ + struct mlx5_indexed_trunk *trunk; + uint32_t trunk_idx; + + if (!idx) + return; + idx -= 1; + mlx5_ipool_lock(pool); + trunk_idx = idx / pool->cfg.trunk_size; + if (trunk_idx >= pool->n_trunk_valid) + goto out; + trunk = pool->trunks[trunk_idx]; + if (!trunk || trunk_idx != trunk->idx || + rte_bitmap_get(trunk->bmp, idx % pool->cfg.trunk_size)) + goto out; + rte_bitmap_set(trunk->bmp, idx % pool->cfg.trunk_size); + trunk->free++; + if (trunk->free == 1) { + /* Put into free trunk list head. */ + MLX5_ASSERT(pool->free_list != trunk->idx); + trunk->next = pool->free_list; + trunk->prev = TRUNK_INVALID; + if (pool->free_list != TRUNK_INVALID) + pool->trunks[pool->free_list]->prev = trunk->idx; + pool->free_list = trunk->idx; +#ifdef POOL_DEBUG + pool->trunk_empty--; + pool->trunk_avail++; +#endif + } +#ifdef POOL_DEBUG + pool->n_entry--; +#endif +out: + mlx5_ipool_unlock(pool); +} + +void * +mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx) +{ + struct mlx5_indexed_trunk *trunk; + void *p = NULL; + uint32_t trunk_idx; + + if (!idx) + return NULL; + idx -= 1; + mlx5_ipool_lock(pool); + trunk_idx = idx / pool->cfg.trunk_size; + if (trunk_idx >= pool->n_trunk_valid) + goto out; + trunk = pool->trunks[trunk_idx]; + if (!trunk || trunk_idx != trunk->idx || + rte_bitmap_get(trunk->bmp, idx % pool->cfg.trunk_size)) + goto out; + p = &trunk->data[(idx % pool->cfg.trunk_size) * pool->cfg.size]; +out: + mlx5_ipool_unlock(pool); + return p; +} + +int +mlx5_ipool_destroy(struct mlx5_indexed_pool *pool) +{ + struct mlx5_indexed_trunk **trunks; + uint32_t i; + + MLX5_ASSERT(pool); + mlx5_ipool_lock(pool); + trunks = pool->trunks; + for (i = 0; i < pool->n_trunk; i++) { + if (trunks[i]) + pool->cfg.free(trunks[i]); + } + if (!pool->trunks) + pool->cfg.free(pool->trunks); + mlx5_ipool_unlock(pool); + rte_free(pool); + return 0; +} + +void +mlx5_ipool_dump(struct mlx5_indexed_pool *pool) +{ + printf("Pool %s entry size %u, trunks %u, %d entry per trunk, " + "total: %d\n", + pool->cfg.type, pool->cfg.size, pool->n_trunk_valid, + pool->cfg.trunk_size, pool->n_trunk_valid); +#ifdef POOL_DEBUG + printf("Pool %s entry %ld, trunk alloc %ld, empty: %ld, " + "available %ld free %ld\n", + pool->cfg.type, pool->n_entry, pool->trunk_new, + pool->trunk_empty, pool->trunk_avail, pool->trunk_free); +#endif +} diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h index 8f305c3..e404a5c 100644 --- a/drivers/net/mlx5/mlx5_utils.h +++ b/drivers/net/mlx5/mlx5_utils.h @@ -12,6 +12,10 @@ #include #include +#include +#include +#include + #include #include "mlx5_defs.h" @@ -60,6 +64,60 @@ (((val) & (from)) / ((from) / (to))) : \ (((val) & (from)) * ((to) / (from)))) +/* + * The indexed memory entry index is made up of trunk index and offset of + * the entry in the trunk. Since the entry index is 32 bits, in case user + * prefers to have small trunks, user can change the macro below to a big + * number which helps the pool contains more trunks with lots of entries + * allocated. + */ +#define TRUNK_IDX_BITS 16 +#define TRUNK_MAX_IDX ((1 << TRUNK_IDX_BITS) - 1) +#define TRUNK_INVALID TRUNK_MAX_IDX +#define MLX5_IPOOL_DEFAULT_TRUNK_SIZE (1 << (28 - TRUNK_IDX_BITS)) +#ifdef RTE_LIBRTE_MLX5_DEBUG +#define POOL_DEBUG 1 +#endif + +struct mlx5_indexed_pool_config { + uint32_t size; /* Pool entry size. */ + uint32_t trunk_size; + /* Trunk entry number. Must be power of 2. */ + uint32_t need_lock; + /* Lock is needed for multiple thread usage. */ + const char *type; /* Memory allocate type name. */ + void *(*malloc)(const char *type, size_t size, unsigned int align, + int socket); + /* User defined memory allocator. */ + void (*free)(void *addr); /* User defined memory release. */ +}; + +struct mlx5_indexed_trunk { + uint32_t idx; /* Trunk id. */ + uint32_t prev; /* Previous free trunk in free list. */ + uint32_t next; /* Next free trunk in free list. */ + uint32_t free; /* Free entries available */ + struct rte_bitmap *bmp; + uint8_t data[] __rte_cache_min_aligned; /* Entry data start. */ +}; + +struct mlx5_indexed_pool { + struct mlx5_indexed_pool_config cfg; /* Indexed pool configuration. */ + rte_spinlock_t lock; /* Pool lock for multiple thread usage. */ + uint32_t n_trunk_valid; /* Trunks allocated. */ + uint32_t n_trunk; /* Trunk pointer array size. */ + /* Dim of trunk pointer array. */ + struct mlx5_indexed_trunk **trunks; + uint32_t free_list; /* Index to first free trunk. */ +#ifdef POOL_DEBUG + int64_t n_entry; + int64_t trunk_new; + int64_t trunk_avail; + int64_t trunk_empty; + int64_t trunk_free; +#endif +}; + /** * Return logarithm of the nearest power of two above input value. * @@ -183,4 +241,175 @@ void mlx5_hlist_remove(struct mlx5_hlist *h __rte_unused, void mlx5_hlist_destroy(struct mlx5_hlist *h, mlx5_hlist_destroy_callback_fn cb, void *ctx); +/** + * This function allocates non-initialized memory entry from pool. + * In NUMA systems, the memory entry allocated resides on the same + * NUMA socket as the core that calls this function. + * + * Memory entry is allocated from memory trunk, no alignment. + * + * @param pool + * Pointer to indexed memory entry pool. + * No initialization required. + * @param[out] idx + * Pointer to memory to save allocated index. + * Memory index always positive value. + * @return + * - Pointer to the allocated memory entry. + * - NULL on error. Not enough memory, or invalid arguments. + */ +void *mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx); + +/** + * This function allocates zero initialized memory entry from pool. + * In NUMA systems, the memory entry allocated resides on the same + * NUMA socket as the core that calls this function. + * + * Memory entry is allocated from memory trunk, no alignment. + * + * @param pool + * Pointer to indexed memory pool. + * No initialization required. + * @param[out] idx + * Pointer to memory to save allocated index. + * Memory index always positive value. + * @return + * - Pointer to the allocated memory entry . + * - NULL on error. Not enough memory, or invalid arguments. + */ +void *mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx); + +/** + * This function frees indexed memory entry to pool. + * Caller has to make sure that the index is allocated from same pool. + * + * @param pool + * Pointer to indexed memory pool. + * @param idx + * Allocated memory entry index. + */ +void mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx); + +/** + * This function returns pointer of indexed memory entry from index. + * Caller has to make sure that the index is valid, and allocated + * from same pool. + * + * @param pool + * Pointer to indexed memory pool. + * @param idx + * Allocated memory index. + * @return + * - Pointer to indexed memory entry. + */ +void *mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx); + +/** + * This function creates indexed memory pool. + * Caller has to configure the configuration accordingly. + * + * @param pool + * Pointer to indexed memory pool. + * @param cfg + * Allocated memory index. + */ +struct mlx5_indexed_pool * +mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg); + +/** + * This function releases all resources of pool. + * Caller has to make sure that all indexes and memories allocated + * from this pool not referenced anymore. + * + * @param pool + * Pointer to indexed memory pool. + * @return + * - non-zero value on error. + * - 0 on success. + */ +int mlx5_ipool_destroy(struct mlx5_indexed_pool *pool); + +/** + * This function dumps debug info of pool. + * + * @param pool + * Pointer to indexed memory pool. + */ +void mlx5_ipool_dump(struct mlx5_indexed_pool *pool); + +/* + * Macros for linked list based on indexed memory. + * Example data structure: + * struct Foo { + * ILIST_ENTRY(uint16_t) next; + * ... + * } + * + */ +#define ILIST_ENTRY(type) \ +struct { \ + type prev; /* Index of previous element. */ \ + type next; /* Index of next element. */ \ +} + +#define ILIST_INSERT(pool, head, idx, elem, field) \ + do { \ + typeof(elem) peer; \ + MLX5_ASSERT((elem) && (idx)); \ + (elem)->field.next = *(head); \ + (elem)->field.prev = 0; \ + if (*(head)) { \ + (peer) = mlx5_ipool_get(pool, *(head)); \ + if (peer) \ + (peer)->field.prev = (idx); \ + } \ + *(head) = (idx); \ + } while (0) + +#define ILIST_REMOVE(pool, head, idx, elem, field) \ + do { \ + typeof(elem) peer; \ + MLX5_ASSERT(elem); \ + MLX5_ASSERT(head); \ + if ((elem)->field.prev) { \ + (peer) = mlx5_ipool_get \ + (pool, (elem)->field.prev); \ + if (peer) \ + (peer)->field.next = (elem)->field.next;\ + } \ + if ((elem)->field.next) { \ + (peer) = mlx5_ipool_get \ + (pool, (elem)->field.next); \ + if (peer) \ + (peer)->field.prev = (elem)->field.prev;\ + } \ + if (*(head) == (idx)) \ + *(head) = (elem)->field.next; \ + } while (0) + +#define ILIST_FOREACH(pool, head, idx, elem, field) \ + for ((idx) = (head), (elem) = \ + (idx) ? mlx5_ipool_get(pool, (idx)) : NULL; (elem); \ + idx = (elem)->field.next, (elem) = \ + (idx) ? mlx5_ipool_get(pool, idx) : NULL) + +/* Single index list. */ +#define SILIST_ENTRY(type) \ +struct { \ + type next; /* Index of next element. */ \ +} + +#define SILIST_INSERT(head, idx, elem, field) \ + do { \ + MLX5_ASSERT((elem) && (idx)); \ + (elem)->field.next = *(head); \ + *(head) = (idx); \ + } while (0) + +#define SILIST_FOREACH(pool, head, idx, elem, field) \ + for ((idx) = (head), (elem) = \ + (idx) ? mlx5_ipool_get(pool, (idx)) : NULL; (elem); \ + idx = (elem)->field.next, (elem) = \ + (idx) ? mlx5_ipool_get(pool, idx) : NULL) + #endif /* RTE_PMD_MLX5_UTILS_H_ */ -- 1.8.3.1