From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from proxy.6wind.com (host.76.145.23.62.rev.coltfrance.com [62.23.145.76]) by dpdk.org (Postfix) with ESMTP id 08B0C1BBE for ; Thu, 14 Apr 2016 12:20:25 +0200 (CEST) Received: from glumotte.dev.6wind.com (unknown [10.16.0.195]) by proxy.6wind.com (Postfix) with ESMTP id 4C5C828F42; Thu, 14 Apr 2016 12:19:41 +0200 (CEST) From: Olivier Matz To: dev@dpdk.org Cc: bruce.richardson@intel.com, stephen@networkplumber.org Date: Thu, 14 Apr 2016 12:19:44 +0200 Message-Id: <1460629199-32489-22-git-send-email-olivier.matz@6wind.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1460629199-32489-1-git-send-email-olivier.matz@6wind.com> References: <1457540381-20274-1-git-send-email-olivier.matz@6wind.com> <1460629199-32489-1-git-send-email-olivier.matz@6wind.com> Subject: [dpdk-dev] [PATCH 21/36] mempool: default allocation in several memory chunks X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 14 Apr 2016 10:20:25 -0000 Introduce rte_mempool_populate_default() which allocates mempool objects in several memzones. The mempool header is now always allocated in a specific memzone (not with its objects). Thanks to this modification, we can remove many specific behavior that was required when hugepages are not enabled in case we are using rte_mempool_xmem_create(). This change requires to update how kni and mellanox drivers lookup for mbuf memory. For now, this will only work if there is only one memory chunk (like today), but we could make use of rte_mempool_mem_iter() to support more memory chunks. We can also remove RTE_MEMPOOL_OBJ_NAME that is not required anymore for the lookup, as memory chunks are referenced by the mempool. Note that rte_mempool_create() is still broken (it was the case before) when there is no hugepages support (rte_mempool_create_xmem() has to be used). This is fixed in next commit. Signed-off-by: Olivier Matz (cherry picked from commit e2ccba488aec7bfa5f06c12b4f7b771134255296) Signed-off-by: Olivier Matz --- drivers/net/mlx4/mlx4.c | 87 ++++++++++++++++++++++--- drivers/net/mlx5/mlx5_rxtx.c | 87 ++++++++++++++++++++++--- drivers/net/mlx5/mlx5_rxtx.h | 2 +- lib/librte_kni/rte_kni.c | 12 +++- lib/librte_mempool/rte_dom0_mempool.c | 2 +- lib/librte_mempool/rte_mempool.c | 119 +++++++++++++++++++--------------- lib/librte_mempool/rte_mempool.h | 11 ---- 7 files changed, 233 insertions(+), 87 deletions(-) diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index 089bbec..c8481a7 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -1198,8 +1198,71 @@ txq_complete(struct txq *txq) return 0; } +struct mlx4_check_mempool_data { + int ret; + char *start; + char *end; +}; + +/* Called by mlx4_check_mempool() when iterating the memory chunks. */ +static void mlx4_check_mempool_cb(struct rte_mempool *mp, + void *opaque, struct rte_mempool_memhdr *memhdr, + unsigned mem_idx) +{ + struct mlx4_check_mempool_data *data = opaque; + + (void)mp; + (void)mem_idx; + + /* It already failed, skip the next chunks. */ + if (data->ret != 0) + return; + /* It is the first chunk. */ + if (data->start == NULL && data->end == NULL) { + data->start = memhdr->addr; + data->end = data->start + memhdr->len; + return; + } + if (data->end == memhdr->addr) { + data->end += memhdr->len; + return; + } + if (data->start == (char *)memhdr->addr + memhdr->len) { + data->start -= memhdr->len; + return; + } + /* Error, mempool is not virtually contigous. */ + data->ret = -1; +} + +/** + * Check if a mempool can be used: it must be virtually contiguous. + * + * @param[in] mp + * Pointer to memory pool. + * @param[out] start + * Pointer to the start address of the mempool virtual memory area + * @param[out] end + * Pointer to the end address of the mempool virtual memory area + * + * @return + * 0 on success (mempool is virtually contiguous), -1 on error. + */ +static int mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start, + uintptr_t *end) +{ + struct mlx4_check_mempool_data data; + + memset(&data, 0, sizeof(data)); + rte_mempool_mem_iter(mp, mlx4_check_mempool_cb, &data); + *start = (uintptr_t)data.start; + *end = (uintptr_t)data.end; + + return data.ret; +} + /* For best performance, this function should not be inlined. */ -static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, const struct rte_mempool *) +static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, struct rte_mempool *) __attribute__((noinline)); /** @@ -1214,15 +1277,21 @@ static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, const struct rte_mempool *) * Memory region pointer, NULL in case of error. */ static struct ibv_mr * -mlx4_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp) +mlx4_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp) { const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - uintptr_t start = mp->elt_va_start; - uintptr_t end = mp->elt_va_end; + uintptr_t start; + uintptr_t end; unsigned int i; + if (mlx4_check_mempool(mp, &start, &end) != 0) { + ERROR("mempool %p: not virtually contiguous", + (void *)mp); + return NULL; + } + DEBUG("mempool %p area start=%p end=%p size=%zu", - (const void *)mp, (void *)start, (void *)end, + (void *)mp, (void *)start, (void *)end, (size_t)(end - start)); /* Round start and end to page boundary if found in memory segments. */ for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { @@ -1236,7 +1305,7 @@ mlx4_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp) end = RTE_ALIGN_CEIL(end, align); } DEBUG("mempool %p using start=%p end=%p size=%zu for MR", - (const void *)mp, (void *)start, (void *)end, + (void *)mp, (void *)start, (void *)end, (size_t)(end - start)); return ibv_reg_mr(pd, (void *)start, @@ -1276,7 +1345,7 @@ txq_mb2mp(struct rte_mbuf *buf) * mr->lkey on success, (uint32_t)-1 on failure. */ static uint32_t -txq_mp2mr(struct txq *txq, const struct rte_mempool *mp) +txq_mp2mr(struct txq *txq, struct rte_mempool *mp) { unsigned int i; struct ibv_mr *mr; @@ -1294,7 +1363,7 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp) } /* Add a new entry, register MR first. */ DEBUG("%p: discovered new memory pool \"%s\" (%p)", - (void *)txq, mp->name, (const void *)mp); + (void *)txq, mp->name, (void *)mp); mr = mlx4_mp2mr(txq->priv->pd, mp); if (unlikely(mr == NULL)) { DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.", @@ -1315,7 +1384,7 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp) txq->mp2mr[i].mr = mr; txq->mp2mr[i].lkey = mr->lkey; DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32, - (void *)txq, mp->name, (const void *)mp, txq->mp2mr[i].lkey); + (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey); return txq->mp2mr[i].lkey; } diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index afd4338..d1e57d2 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -140,8 +140,71 @@ txq_complete(struct txq *txq) return 0; } +struct mlx5_check_mempool_data { + int ret; + char *start; + char *end; +}; + +/* Called by mlx5_check_mempool() when iterating the memory chunks. */ +static void mlx5_check_mempool_cb(struct rte_mempool *mp, + void *opaque, struct rte_mempool_memhdr *memhdr, + unsigned mem_idx) +{ + struct mlx5_check_mempool_data *data = opaque; + + (void)mp; + (void)mem_idx; + + /* It already failed, skip the next chunks. */ + if (data->ret != 0) + return; + /* It is the first chunk. */ + if (data->start == NULL && data->end == NULL) { + data->start = memhdr->addr; + data->end = data->start + memhdr->len; + return; + } + if (data->end == memhdr->addr) { + data->end += memhdr->len; + return; + } + if (data->start == (char *)memhdr->addr + memhdr->len) { + data->start -= memhdr->len; + return; + } + /* Error, mempool is not virtually contigous. */ + data->ret = -1; +} + +/** + * Check if a mempool can be used: it must be virtually contiguous. + * + * @param[in] mp + * Pointer to memory pool. + * @param[out] start + * Pointer to the start address of the mempool virtual memory area + * @param[out] end + * Pointer to the end address of the mempool virtual memory area + * + * @return + * 0 on success (mempool is virtually contiguous), -1 on error. + */ +static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start, + uintptr_t *end) +{ + struct mlx5_check_mempool_data data; + + memset(&data, 0, sizeof(data)); + rte_mempool_mem_iter(mp, mlx5_check_mempool_cb, &data); + *start = (uintptr_t)data.start; + *end = (uintptr_t)data.end; + + return data.ret; +} + /* For best performance, this function should not be inlined. */ -struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, const struct rte_mempool *) +struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *) __attribute__((noinline)); /** @@ -156,15 +219,21 @@ struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, const struct rte_mempool *) * Memory region pointer, NULL in case of error. */ struct ibv_mr * -mlx5_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp) +mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp) { const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - uintptr_t start = mp->elt_va_start; - uintptr_t end = mp->elt_va_end; + uintptr_t start; + uintptr_t end; unsigned int i; + if (mlx5_check_mempool(mp, &start, &end) != 0) { + ERROR("mempool %p: not virtually contiguous", + (void *)mp); + return NULL; + } + DEBUG("mempool %p area start=%p end=%p size=%zu", - (const void *)mp, (void *)start, (void *)end, + (void *)mp, (void *)start, (void *)end, (size_t)(end - start)); /* Round start and end to page boundary if found in memory segments. */ for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { @@ -178,7 +247,7 @@ mlx5_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp) end = RTE_ALIGN_CEIL(end, align); } DEBUG("mempool %p using start=%p end=%p size=%zu for MR", - (const void *)mp, (void *)start, (void *)end, + (void *)mp, (void *)start, (void *)end, (size_t)(end - start)); return ibv_reg_mr(pd, (void *)start, @@ -218,7 +287,7 @@ txq_mb2mp(struct rte_mbuf *buf) * mr->lkey on success, (uint32_t)-1 on failure. */ static uint32_t -txq_mp2mr(struct txq *txq, const struct rte_mempool *mp) +txq_mp2mr(struct txq *txq, struct rte_mempool *mp) { unsigned int i; struct ibv_mr *mr; @@ -236,7 +305,7 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp) } /* Add a new entry, register MR first. */ DEBUG("%p: discovered new memory pool \"%s\" (%p)", - (void *)txq, mp->name, (const void *)mp); + (void *)txq, mp->name, (void *)mp); mr = mlx5_mp2mr(txq->priv->pd, mp); if (unlikely(mr == NULL)) { DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.", @@ -257,7 +326,7 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp) txq->mp2mr[i].mr = mr; txq->mp2mr[i].lkey = mr->lkey; DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32, - (void *)txq, mp->name, (const void *)mp, txq->mp2mr[i].lkey); + (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey); return txq->mp2mr[i].lkey; } diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index db054d6..d522f70 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -341,7 +341,7 @@ uint16_t mlx5_tx_burst_secondary_setup(void *dpdk_txq, struct rte_mbuf **pkts, /* mlx5_rxtx.c */ -struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, const struct rte_mempool *); +struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *); void txq_mp2mr_iter(struct rte_mempool *, void *); uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t); uint16_t mlx5_rx_burst_sp(void *, struct rte_mbuf **, uint16_t); diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index ea9baf4..3028fd4 100644 --- a/lib/librte_kni/rte_kni.c +++ b/lib/librte_kni/rte_kni.c @@ -323,6 +323,7 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool, char intf_name[RTE_KNI_NAMESIZE]; char mz_name[RTE_MEMZONE_NAMESIZE]; const struct rte_memzone *mz; + const struct rte_mempool *mp; struct rte_kni_memzone_slot *slot = NULL; if (!pktmbuf_pool || !conf || !conf->name[0]) @@ -415,12 +416,17 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool, /* MBUF mempool */ - snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, + snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, pktmbuf_pool->name); mz = rte_memzone_lookup(mz_name); KNI_MEM_CHECK(mz == NULL); - dev_info.mbuf_va = mz->addr; - dev_info.mbuf_phys = mz->phys_addr; + mp = (struct rte_mempool *)mz->addr; + /* KNI currently requires to have only one memory chunk */ + if (mp->nb_mem_chunks != 1) + goto kni_fail; + + dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr; + dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr; ctx->pktmbuf_pool = pktmbuf_pool; ctx->group_id = conf->group_id; ctx->slot_id = slot->id; diff --git a/lib/librte_mempool/rte_dom0_mempool.c b/lib/librte_mempool/rte_dom0_mempool.c index 0051bd5..dad755c 100644 --- a/lib/librte_mempool/rte_dom0_mempool.c +++ b/lib/librte_mempool/rte_dom0_mempool.c @@ -110,7 +110,7 @@ rte_dom0_mempool_create(const char *name, unsigned elt_num, unsigned elt_size, if (pa == NULL) return mp; - snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, name); + snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT "_elt", name); mz = rte_memzone_reserve(mz_name, sz, socket_id, mz_flags); if (mz == NULL) { free(pa); diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index edf26ae..5b21d0a 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -389,7 +389,7 @@ rte_mempool_ring_create(struct rte_mempool *mp) } /* free a memchunk allocated with rte_memzone_reserve() */ -__rte_unused static void +static void rte_mempool_memchunk_mz_free(__rte_unused struct rte_mempool_memhdr *memhdr, void *opaque) { @@ -507,6 +507,59 @@ rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr, return cnt; } +/* Default function to populate the mempool: allocate memory in mezones, + * and populate them. Return the number of objects added, or a negative + * value on error. */ +static int rte_mempool_populate_default(struct rte_mempool *mp) +{ + int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; + char mz_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz; + size_t size, total_elt_sz, align; + unsigned mz_id, n; + int ret; + + /* mempool must not be populated */ + if (mp->nb_mem_chunks != 0) + return -EEXIST; + + align = RTE_CACHE_LINE_SIZE; + total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; + for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) { + size = rte_mempool_xmem_size(n, total_elt_sz, 0); + + ret = snprintf(mz_name, sizeof(mz_name), + RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id); + if (ret < 0 || ret >= (int)sizeof(mz_name)) { + ret = -ENAMETOOLONG; + goto fail; + } + + mz = rte_memzone_reserve_aligned(mz_name, size, + mp->socket_id, mz_flags, align); + /* not enough memory, retry with the biggest zone we have */ + if (mz == NULL) + mz = rte_memzone_reserve_aligned(mz_name, 0, + mp->socket_id, mz_flags, align); + if (mz == NULL) { + ret = -rte_errno; + goto fail; + } + + ret = rte_mempool_populate_phys(mp, mz->addr, mz->phys_addr, + mz->len, rte_mempool_memchunk_mz_free, + (void *)(uintptr_t)mz); + if (ret < 0) + goto fail; + } + + return mp->size; + + fail: + rte_mempool_free_memchunks(mp); + return ret; +} + /* * Create the mempool over already allocated chunk of memory. * That external memory buffer can consists of physically disjoint pages. @@ -526,13 +579,10 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, struct rte_mempool_list *mempool_list; struct rte_mempool *mp = NULL; struct rte_tailq_entry *te = NULL; - const struct rte_memzone *mz; + const struct rte_memzone *mz = NULL; size_t mempool_size; int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; - void *obj; struct rte_mempool_objsz objsz; - void *startaddr; - int page_size = getpagesize(); int ret; /* compilation-time checks */ @@ -587,16 +637,6 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, private_data_size = (private_data_size + RTE_MEMPOOL_ALIGN_MASK) & (~RTE_MEMPOOL_ALIGN_MASK); - if (! rte_eal_has_hugepages()) { - /* - * expand private data size to a whole page, so that the - * first pool element will start on a new standard page - */ - int head = sizeof(struct rte_mempool); - int new_size = (private_data_size + head) % page_size; - if (new_size) - private_data_size += page_size - new_size; - } /* try to allocate tailq entry */ te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0); @@ -613,17 +653,6 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, mempool_size = MEMPOOL_HEADER_SIZE(mp, cache_size); mempool_size += private_data_size; mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN); - if (vaddr == NULL) - mempool_size += (size_t)objsz.total_size * n; - - if (! rte_eal_has_hugepages()) { - /* - * we want the memory pool to start on a page boundary, - * because pool elements crossing page boundaries would - * result in discontiguous physical addresses - */ - mempool_size += page_size; - } snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name); @@ -631,20 +660,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, if (mz == NULL) goto exit_unlock; - if (rte_eal_has_hugepages()) { - startaddr = (void*)mz->addr; - } else { - /* align memory pool start address on a page boundary */ - unsigned long addr = (unsigned long)mz->addr; - if (addr & (page_size - 1)) { - addr += page_size; - addr &= ~(page_size - 1); - } - startaddr = (void*)addr; - } - /* init the mempool structure */ - mp = startaddr; memset(mp, 0, sizeof(*mp)); snprintf(mp->name, sizeof(mp->name), "%s", name); mp->phys_addr = mz->phys_addr; @@ -675,22 +691,17 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, mp_init(mp, mp_init_arg); /* mempool elements allocated together with mempool */ - if (vaddr == NULL) { - /* calculate address of the first element for continuous mempool. */ - obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, cache_size) + - private_data_size; - obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN); - - ret = rte_mempool_populate_phys(mp, obj, - mp->phys_addr + ((char *)obj - (char *)mp), - objsz.total_size * n, NULL, NULL); - if (ret != (int)mp->size) - goto exit_unlock; - } else { + if (vaddr == NULL) + ret = rte_mempool_populate_default(mp); + else ret = rte_mempool_populate_phys_tab(mp, vaddr, paddr, pg_num, pg_shift, NULL, NULL); - if (ret != (int)mp->size) - goto exit_unlock; + if (ret < 0) { + rte_errno = -ret; + goto exit_unlock; + } else if (ret != (int)mp->size) { + rte_errno = EINVAL; + goto exit_unlock; } /* call the initializer */ @@ -713,6 +724,8 @@ exit_unlock: rte_ring_free(mp->ring); } rte_free(te); + if (mz != NULL) + rte_memzone_free(mz); return NULL; } diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 38e5abd..a579953 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -124,17 +124,6 @@ struct rte_mempool_objsz { /* "MP_" */ #define RTE_MEMPOOL_MZ_FORMAT RTE_MEMPOOL_MZ_PREFIX "%s" -#ifdef RTE_LIBRTE_XEN_DOM0 - -/* "_MP_elt" */ -#define RTE_MEMPOOL_OBJ_NAME "%s_" RTE_MEMPOOL_MZ_PREFIX "elt" - -#else - -#define RTE_MEMPOOL_OBJ_NAME RTE_MEMPOOL_MZ_FORMAT - -#endif /* RTE_LIBRTE_XEN_DOM0 */ - #define MEMPOOL_PG_SHIFT_MAX (sizeof(uintptr_t) * CHAR_BIT - 1) /** Mempool over one chunk of physically continuous memory */ -- 2.1.4