From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by dpdk.org (Postfix) with ESMTP id B9B411E495 for ; Tue, 12 Jun 2018 11:46:19 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga107.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 12 Jun 2018 02:46:17 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.51,214,1526367600"; d="scan'208";a="46478797" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga007.fm.intel.com with ESMTP; 12 Jun 2018 02:46:17 -0700 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w5C9kG4s015120 for ; Tue, 12 Jun 2018 10:46:16 +0100 Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w5C9kG60021127 for ; Tue, 12 Jun 2018 10:46:16 +0100 Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w5C9kGui021123 for dev@dpdk.org; Tue, 12 Jun 2018 10:46:16 +0100 From: Anatoly Burakov To: dev@dpdk.org Date: Tue, 12 Jun 2018 10:46:15 +0100 Message-Id: <311a48e50a626b7a6e8a80b0da79e6c725b79b52.1528796062.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 2/3] mem: provide thread-unsafe memseg walk variant X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 12 Jun 2018 09:46:20 -0000 Sometimes, user code needs to walk memseg list while being inside a memory-related callback. Rather than making everyone copy around the same iteration code and depending on DPDK internals, provide an official way to do memseg_walk() inside callbacks. Also, remove existing reimplementation from sPAPR VFIO code and use the new API instead. Signed-off-by: Anatoly Burakov --- lib/librte_eal/common/eal_common_memory.c | 28 ++++++++------ lib/librte_eal/common/include/rte_memory.h | 18 +++++++++ lib/librte_eal/linuxapp/eal/eal_vfio.c | 43 +++------------------- lib/librte_eal/rte_eal_version.map | 1 + 4 files changed, 40 insertions(+), 50 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index e3320a746..afe0d5b57 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -841,14 +841,11 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg) } int __rte_experimental -rte_memseg_walk(rte_memseg_walk_t func, void *arg) +rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; int i, ms_idx, ret = 0; - /* do not allow allocations/frees/init while we iterate */ - rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); - for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { struct rte_memseg_list *msl = &mcfg->memsegs[i]; const struct rte_memseg *ms; @@ -863,18 +860,25 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg) while (ms_idx >= 0) { ms = rte_fbarray_get(arr, ms_idx); ret = func(msl, ms, arg); - if (ret < 0) { - ret = -1; - goto out; - } else if (ret > 0) { - ret = 1; - goto out; - } + if (ret) + return ret; ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1); } } -out: + return 0; +} + +int __rte_experimental +rte_memseg_walk(rte_memseg_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int ret = 0; + + /* do not allow allocations/frees/init while we iterate */ + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_walk_thread_unsafe(func, arg); rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + return ret; } diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index aeba38bfa..c5a84c333 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -263,6 +263,24 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg); int __rte_experimental rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); +/** + * Walk list of all memsegs without performing any locking. + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg); + /** * Walk each VA-contiguous area without performing any locking. * diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index a2bbdfbf4..14c9332e9 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -87,42 +87,6 @@ static const struct vfio_iommu_type iommu_types[] = { }, }; -/* for sPAPR IOMMU, we will need to walk memseg list, but we cannot use - * rte_memseg_walk() because by the time we enter callback we will be holding a - * write lock, so regular rte-memseg_walk will deadlock. copying the same - * iteration code everywhere is not ideal as well. so, use a lockless copy of - * memseg walk here. - */ -static int -memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg) -{ - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int i, ms_idx, ret = 0; - - for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { - struct rte_memseg_list *msl = &mcfg->memsegs[i]; - const struct rte_memseg *ms; - struct rte_fbarray *arr; - - if (msl->memseg_arr.count == 0) - continue; - - arr = &msl->memseg_arr; - - ms_idx = rte_fbarray_find_next_used(arr, 0); - while (ms_idx >= 0) { - ms = rte_fbarray_get(arr, ms_idx); - ret = func(msl, ms, arg); - if (ret < 0) - return -1; - if (ret > 0) - return 1; - ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1); - } - } - return 0; -} - static int is_null_map(const struct user_mem_map *map) { @@ -1357,7 +1321,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, /* check if window size needs to be adjusted */ memset(¶m, 0, sizeof(param)); - if (memseg_walk_thread_unsafe(vfio_spapr_window_size_walk, + /* we're inside a callback so use thread-unsafe version */ + if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk, ¶m) < 0) { RTE_LOG(ERR, EAL, "Could not get window size\n"); ret = -1; @@ -1386,7 +1351,9 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, ret = -1; goto out; } - if (memseg_walk_thread_unsafe(vfio_spapr_map_walk, + /* we're inside a callback, so use thread-unsafe version + */ + if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk, &vfio_container_fd) < 0) { RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n"); ret = -1; diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 98bfbe796..72d32fc39 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -289,6 +289,7 @@ EXPERIMENTAL { rte_memseg_contig_walk_thread_unsafe; rte_memseg_list_walk; rte_memseg_walk; + rte_memseg_walk_thread_unsafe; rte_mp_action_register; rte_mp_action_unregister; rte_mp_reply; -- 2.17.1