From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id CCDEDAAA9 for ; Wed, 7 Mar 2018 17:57:29 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 07 Mar 2018 08:57:17 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.47,436,1515484800"; d="scan'208";a="32086350" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by FMSMGA003.fm.intel.com with ESMTP; 07 Mar 2018 08:57:13 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w27GvCGT032410; Wed, 7 Mar 2018 16:57:12 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w27GvCs6006770; Wed, 7 Mar 2018 16:57:12 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w27GvC2Z006766; Wed, 7 Mar 2018 16:57:12 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: Marcin Wojtas , Michal Krawczyk , Guy Tzalik , Evgeny Schemeilin , Bruce Richardson , keith.wiles@intel.com, jianfeng.tan@intel.com, andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com, thomas@monjalon.net, konstantin.ananyev@intel.com, kuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com, nelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch, jerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com, olivier.matz@6wind.com Date: Wed, 7 Mar 2018 16:56:50 +0000 Message-Id: <3d614cae4b7d1274d4be77da2d3e78559133a865.1520428025.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH v2 22/41] eal: replace memzone array with fbarray X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 07 Mar 2018 16:57:30 -0000 It's there, so we might as well use it. Some operations will be sped up by that. Since we have to allocate an fbarray for memzones, we have to do it before we initialize memory subsystem, because that, in secondary processes, will (later) allocate more fbarrays than the primary process, which will result in inability to attach to memzone fbarray if we do it after the fact. Signed-off-by: Anatoly Burakov --- Notes: Code for ENA driver makes little sense to me, but i've attempted to keep the same semantics as the old code. drivers/net/ena/ena_ethdev.c | 10 +- lib/librte_eal/bsdapp/eal/eal.c | 6 + lib/librte_eal/common/eal_common_memzone.c | 180 +++++++++++++++------- lib/librte_eal/common/include/rte_eal_memconfig.h | 4 +- lib/librte_eal/common/malloc_heap.c | 4 + lib/librte_eal/linuxapp/eal/eal.c | 13 +- test/test/test_memzone.c | 9 +- 7 files changed, 157 insertions(+), 69 deletions(-) diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c index 34b2a8d..f7bfc7a 100644 --- a/drivers/net/ena/ena_ethdev.c +++ b/drivers/net/ena/ena_ethdev.c @@ -264,11 +264,15 @@ static const struct eth_dev_ops ena_dev_ops = { static inline int ena_cpu_to_node(int cpu) { struct rte_config *config = rte_eal_get_configuration(); + struct rte_fbarray *arr = &config->mem_config->memzones; + const struct rte_memzone *mz; - if (likely(cpu < RTE_MAX_MEMZONE)) - return config->mem_config->memzone[cpu].socket_id; + if (unlikely(cpu >= RTE_MAX_MEMZONE)) + return NUMA_NO_NODE; - return NUMA_NO_NODE; + mz = rte_fbarray_get(arr, cpu); + + return mz->socket_id; } static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf, diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 45e5670..3b06e21 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -608,6 +608,12 @@ rte_eal_init(int argc, char **argv) return -1; } + if (rte_eal_malloc_heap_init() < 0) { + rte_eal_init_alert("Cannot init malloc heap\n"); + rte_errno = ENODEV; + return -1; + } + if (rte_eal_tailqs_init() < 0) { rte_eal_init_alert("Cannot init tail queues for objects\n"); rte_errno = EFAULT; diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 8c9aa28..a7cfdaf 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -28,42 +28,29 @@ static inline const struct rte_memzone * memzone_lookup_thread_unsafe(const char *name) { - const struct rte_mem_config *mcfg; + struct rte_mem_config *mcfg; + struct rte_fbarray *arr; const struct rte_memzone *mz; - unsigned i = 0; + int i = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; /* * the algorithm is not optimal (linear), but there are few * zones and this function should be called at init only */ - for (i = 0; i < RTE_MAX_MEMZONE; i++) { - mz = &mcfg->memzone[i]; - if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) - return &mcfg->memzone[i]; + while ((i = rte_fbarray_find_next_used(arr, i)) >= 0) { + mz = rte_fbarray_get(arr, i++); + if (mz->addr != NULL && + !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) + return mz; } return NULL; } -static inline struct rte_memzone * -get_next_free_memzone(void) -{ - struct rte_mem_config *mcfg; - unsigned i = 0; - - /* get pointer to global configuration */ - mcfg = rte_eal_get_configuration()->mem_config; - - for (i = 0; i < RTE_MAX_MEMZONE; i++) { - if (mcfg->memzone[i].addr == NULL) - return &mcfg->memzone[i]; - } - - return NULL; -} /* This function will return the greatest free block if a heap has been * specified. If no heap has been specified, it will return the heap and @@ -103,13 +90,16 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, { struct rte_memzone *mz; struct rte_mem_config *mcfg; + struct rte_fbarray *arr; size_t requested_len; + int idx; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; /* no more room in config */ - if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) { + if (arr->count >= arr->len) { RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__); rte_errno = ENOSPC; return NULL; @@ -199,7 +189,14 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* fill the zone in config */ - mz = get_next_free_memzone(); + idx = rte_fbarray_find_next_free(arr, 0); + + if (idx < 0) { + mz = NULL; + } else { + rte_fbarray_set_used(arr, idx); + mz = rte_fbarray_get(arr, idx); + } if (mz == NULL) { RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room " @@ -209,7 +206,6 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, return NULL; } - mcfg->memzone_cnt++; snprintf(mz->name, sizeof(mz->name), "%s", name); mz->iova = rte_malloc_virt2iova(mz_addr); mz->addr = mz_addr; @@ -322,6 +318,8 @@ int rte_memzone_free(const struct rte_memzone *mz) { struct rte_mem_config *mcfg; + struct rte_fbarray *arr; + struct rte_memzone *found_mz; int ret = 0; void *addr; unsigned idx; @@ -330,21 +328,26 @@ rte_memzone_free(const struct rte_memzone *mz) return -EINVAL; mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; rte_rwlock_write_lock(&mcfg->mlock); - idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); - idx = idx / sizeof(struct rte_memzone); + idx = rte_fbarray_find_idx(arr, mz); + found_mz = rte_fbarray_get(arr, idx); - addr = mcfg->memzone[idx].addr; - if (addr == NULL) + if (found_mz == NULL) { ret = -EINVAL; - else if (mcfg->memzone_cnt == 0) { - rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n", - __func__); } else { - memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx])); - mcfg->memzone_cnt--; + addr = found_mz->addr; + if (addr == NULL) + ret = -EINVAL; + else if (arr->count == 0) { + rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n", + __func__); + } else { + memset(found_mz, 0, sizeof(*found_mz)); + rte_fbarray_set_free(arr, idx); + } } rte_rwlock_write_unlock(&mcfg->mlock); @@ -378,25 +381,79 @@ rte_memzone_lookup(const char *name) void rte_memzone_dump(FILE *f) { + struct rte_fbarray *arr; struct rte_mem_config *mcfg; - unsigned i = 0; + int i = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; rte_rwlock_read_lock(&mcfg->mlock); /* dump all zones */ - for (i=0; imemzone[i].addr == NULL) - break; - fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx" + while ((i = rte_fbarray_find_next_used(arr, i)) >= 0) { + void *cur_addr, *mz_end; + struct rte_memzone *mz; + struct rte_memseg_list *msl = NULL; + struct rte_memseg *ms; + int ms_idx; + + mz = rte_fbarray_get(arr, i); + + /* + * memzones can span multiple physical pages, so dump addresses + * of all physical pages this memzone spans. + */ + + fprintf(f, "Zone %u: name:<%s>, len:0x%zx" ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i, - mcfg->memzone[i].name, - mcfg->memzone[i].iova, - mcfg->memzone[i].len, - mcfg->memzone[i].addr, - mcfg->memzone[i].socket_id, - mcfg->memzone[i].flags); + mz->name, + mz->len, + mz->addr, + mz->socket_id, + mz->flags); + + msl = rte_mem_virt2memseg_list(mz->addr); + if (!msl) { + RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n"); + continue; + } + + cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, mz->hugepage_sz); + mz_end = RTE_PTR_ADD(cur_addr, mz->len); + + fprintf(f, "physical segments used:\n"); + if (msl->base_va == NULL) { + /* if memseg list base VA, we're in legacy mem mode, + * which means we have only one memseg. + */ + ms = rte_mem_virt2memseg(mz->addr, msl); + + fprintf(f, " addr: %p iova: 0x%" PRIx64 " " + "len: 0x%" PRIx64 " " + "pagesz: 0x%" PRIx64 "\n", + cur_addr, ms->iova, ms->len, ms->hugepage_sz); + } else { + ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / + msl->hugepage_sz; + ms = rte_fbarray_get(&msl->memseg_arr, ms_idx); + + do { + fprintf(f, " addr: %p iova: 0x%" PRIx64 " " + "len: 0x%" PRIx64 " " + "pagesz: 0x%" PRIx64 "\n", + cur_addr, ms->iova, ms->len, + ms->hugepage_sz); + + /* advance VA to next page */ + cur_addr = RTE_PTR_ADD(cur_addr, + ms->hugepage_sz); + + /* memzones occupy contiguous segments */ + ++ms; + } while (cur_addr < mz_end); + } + i++; } rte_rwlock_read_unlock(&mcfg->mlock); } @@ -412,19 +469,23 @@ rte_eal_memzone_init(void) /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; - /* secondary processes don't need to initialise anything */ - if (rte_eal_process_type() == RTE_PROC_SECONDARY) - return 0; - rte_rwlock_write_lock(&mcfg->mlock); - /* delete all zones */ - mcfg->memzone_cnt = 0; - memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); + if (rte_eal_process_type() == RTE_PROC_PRIMARY && + rte_fbarray_init(&mcfg->memzones, "memzone", + RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) { + RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n"); + return -1; + } else if (rte_eal_process_type() == RTE_PROC_SECONDARY && + rte_fbarray_attach(&mcfg->memzones)) { + RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n"); + rte_rwlock_write_unlock(&mcfg->mlock); + return -1; + } rte_rwlock_write_unlock(&mcfg->mlock); - return rte_eal_malloc_heap_init(); + return 0; } /* Walk all reserved memory zones */ @@ -432,14 +493,19 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *), void *arg) { struct rte_mem_config *mcfg; - unsigned i; + struct rte_fbarray *arr; + int i; mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; + + i = 0; rte_rwlock_read_lock(&mcfg->mlock); - for (i=0; imemzone[i].addr != NULL) - (*func)(&mcfg->memzone[i], arg); + while ((i = rte_fbarray_find_next_used(arr, i)) > 0) { + struct rte_memzone *mz = rte_fbarray_get(arr, i); + (*func)(mz, arg); + i++; } rte_rwlock_read_unlock(&mcfg->mlock); } diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h index 31fc8e7..b6bdb21 100644 --- a/lib/librte_eal/common/include/rte_eal_memconfig.h +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -58,10 +58,8 @@ struct rte_mem_config { rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */ rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */ - uint32_t memzone_cnt; /**< Number of allocated memzones */ - /* memory segments and zones */ - struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */ + struct rte_fbarray memzones; /**< Memzone descriptors. */ struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS]; /**< list of dynamic arrays holding memsegs */ diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 984e027..7a3d0f3 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -579,6 +579,10 @@ rte_eal_malloc_heap_init(void) if (mcfg == NULL) return -1; + /* secondary processes don't need to initialize heap */ + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return 0; + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { int start; struct rte_fbarray *arr; diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 7851a7d..d336c96 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -857,6 +857,15 @@ rte_eal_init(int argc, char **argv) return -1; } #endif + /* memzone_init maps rte_fbarrays, which has to be done before hugepage + * init/attach, because attach creates extra fbarrays in secondary + * process, resulting in inability to map memzone fbarray. + */ + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; + } if (rte_eal_memory_init() < 0) { rte_eal_init_alert("Cannot init memory\n"); @@ -867,8 +876,8 @@ rte_eal_init(int argc, char **argv) /* the directories are locked during eal_hugepage_info_init */ eal_hugedirs_unlock(); - if (rte_eal_memzone_init() < 0) { - rte_eal_init_alert("Cannot init memzone\n"); + if (rte_eal_malloc_heap_init() < 0) { + rte_eal_init_alert("Cannot init malloc heap\n"); rte_errno = ENODEV; return -1; } diff --git a/test/test/test_memzone.c b/test/test/test_memzone.c index 47f4de8..4b49d61 100644 --- a/test/test/test_memzone.c +++ b/test/test/test_memzone.c @@ -893,7 +893,7 @@ test_memzone_basic(void) const struct rte_memzone *mz; int memzone_cnt_after, memzone_cnt_expected; int memzone_cnt_before = - rte_eal_get_configuration()->mem_config->memzone_cnt; + rte_eal_get_configuration()->mem_config->memzones.count; memzone1 = rte_memzone_reserve(TEST_MEMZONE_NAME("testzone1"), 100, SOCKET_ID_ANY, 0); @@ -917,7 +917,7 @@ test_memzone_basic(void) (memzone3 != NULL) + (memzone4 != NULL); memzone_cnt_after = - rte_eal_get_configuration()->mem_config->memzone_cnt; + rte_eal_get_configuration()->mem_config->memzones.count; if (memzone_cnt_after != memzone_cnt_expected) return -1; @@ -996,7 +996,7 @@ test_memzone_basic(void) } memzone_cnt_after = - rte_eal_get_configuration()->mem_config->memzone_cnt; + rte_eal_get_configuration()->mem_config->memzones.count; if (memzone_cnt_after != memzone_cnt_before) return -1; @@ -1017,7 +1017,8 @@ static int test_memzone(void) { /* take note of how many memzones were allocated before running */ - int memzone_cnt = rte_eal_get_configuration()->mem_config->memzone_cnt; + int memzone_cnt = + rte_eal_get_configuration()->mem_config->memzones.count; printf("test basic memzone API\n"); if (test_memzone_basic() < 0) -- 2.7.4