From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 72EE65B20 for ; Sat, 3 Mar 2018 14:46:38 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 03 Mar 2018 05:46:37 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.47,418,1515484800"; d="scan'208";a="31152833" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by FMSMGA003.fm.intel.com with ESMTP; 03 Mar 2018 05:46:34 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w23DkYao012246; Sat, 3 Mar 2018 13:46:34 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w23DkYO6023877; Sat, 3 Mar 2018 13:46:34 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w23DkYi6023873; Sat, 3 Mar 2018 13:46:34 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: keith.wiles@intel.com, jianfeng.tan@intel.com, andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net, konstantin.ananyev@intel.com, kuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com, nelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch, jerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com, olivier.matz@6wind.com Date: Sat, 3 Mar 2018 13:46:18 +0000 Message-Id: X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 30/41] eal: enable callbacks on malloc/free and mp sync X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 03 Mar 2018 13:46:38 -0000 Also, rewrite VFIO to rely on memory callbacks instead of manually registering memory with VFIO. Callbacks will only be registered if VFIO is enabled. Signed-off-by: Anatoly Burakov --- lib/librte_eal/common/malloc_heap.c | 21 +++++++++++++++++ lib/librte_eal/linuxapp/eal/eal_memalloc.c | 37 +++++++++++++++++++++--------- lib/librte_eal/linuxapp/eal/eal_vfio.c | 35 ++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 11 deletions(-) diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 9109555..9d055c8 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -223,6 +223,7 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, void *map_addr; size_t map_len; int n_pages; + bool callback_triggered = false; map_len = RTE_ALIGN_CEIL(align + elt_size + MALLOC_ELEM_TRAILER_LEN, pg_sz); @@ -242,14 +243,25 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, map_addr = ms[0]->addr; + /* notify user about changes in memory map */ + eal_memalloc_notify(RTE_MEM_EVENT_ALLOC, map_addr, map_len); + /* notify other processes that this has happened */ if (request_sync()) { /* we couldn't ensure all processes have mapped memory, * so free it back and notify everyone that it's been * freed back. + * + * technically, we could've avoided adding memory addresses to + * the map, but that would've led to inconsistent behavior + * between primary and secondary processes, as those get + * callbacks during sync. therefore, force primary process to + * do alloc-and-rollback syncs as well. */ + callback_triggered = true; goto free_elem; } + heap->total_size += map_len; RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n", @@ -260,6 +272,9 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, return 0; free_elem: + if (callback_triggered) + eal_memalloc_notify(RTE_MEM_EVENT_FREE, map_addr, map_len); + rollback_expand_heap(ms, n_pages, elem, map_addr, map_len); request_sync(); @@ -615,6 +630,10 @@ malloc_heap_free(struct malloc_elem *elem) heap->total_size -= n_pages * msl->hugepage_sz; if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* notify user about changes in memory map */ + eal_memalloc_notify(RTE_MEM_EVENT_FREE, + aligned_start, aligned_len); + /* don't care if any of this fails */ malloc_heap_free_pages(aligned_start, aligned_len); @@ -637,6 +656,8 @@ malloc_heap_free(struct malloc_elem *elem) * already removed from the heap, so it is, for all intents and * purposes, hidden from the rest of DPDK even if some other * process (including this one) may have these pages mapped. + * + * notifications about deallocated memory happen during sync. */ request_to_primary(&req); } diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c index 227d703..1008fae 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c +++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c @@ -34,7 +34,6 @@ #include #include #include -#include #include "eal_filesystem.h" #include "eal_internal_cfg.h" @@ -480,10 +479,6 @@ alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id, ms->iova = iova; ms->socket_id = socket_id; - /* map the segment so that VFIO has access to it */ - if (rte_eal_iova_mode() == RTE_IOVA_VA && - rte_vfio_dma_map(ms->addr_64, iova, size)) - RTE_LOG(DEBUG, EAL, "Cannot register segment with VFIO\n"); return 0; mapped: @@ -515,12 +510,6 @@ free_page(struct rte_memseg *ms, struct hugepage_info *hi, char path[PATH_MAX]; int fd, ret; - /* unmap the segment from VFIO */ - if (rte_eal_iova_mode() == RTE_IOVA_VA && - rte_vfio_dma_unmap(ms->addr_64, ms->iova, ms->len)) { - RTE_LOG(DEBUG, EAL, "Cannot unregister segment with VFIO\n"); - } - if (mmap(ms->addr, ms->hugepage_sz, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == MAP_FAILED) { @@ -808,6 +797,19 @@ sync_chunk(struct rte_memseg_list *primary_msl, diff_len = RTE_MIN(chunk_len, diff_len); + /* if we are freeing memory, notif the application */ + if (!used) { + struct rte_memseg *ms; + void *start_va; + size_t len; + + ms = rte_fbarray_get(l_arr, start); + start_va = ms->addr; + len = ms->len * diff_len; + + eal_memalloc_notify(RTE_MEM_EVENT_FREE, start_va, len); + } + for (i = 0; i < diff_len; i++) { struct rte_memseg *p_ms, *l_ms; int seg_idx = start + i; @@ -834,6 +836,19 @@ sync_chunk(struct rte_memseg_list *primary_msl, } } + /* if we just allocated memory, notify the application */ + if (used) { + struct rte_memseg *ms; + void *start_va; + size_t len; + + ms = rte_fbarray_get(l_arr, start); + start_va = ms->addr; + len = ms->len * diff_len; + + eal_memalloc_notify(RTE_MEM_EVENT_ALLOC, start_va, len); + } + /* calculate how much we can advance until next chunk */ diff_len = used ? rte_fbarray_find_contig_used(l_arr, start) : diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 8fe8984..d3c3b70 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -214,6 +214,37 @@ vfio_group_device_count(int vfio_group_fd) return vfio_cfg.vfio_groups[i].devices; } +static void +vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len) +{ + struct rte_memseg_list *msl; + struct rte_memseg *ms; + size_t cur_len = 0; + uint64_t pgsz; + + msl = rte_mem_virt2memseg_list(addr); + pgsz = msl->hugepage_sz; + + while (cur_len < len) { + const void *va = RTE_PTR_ADD(addr, cur_len); + uint64_t vfio_va, iova; + + ms = rte_mem_virt2memseg(va, msl); + vfio_va = (uint64_t) (uintptr_t) va; + iova = ms->iova; + + /* this never gets called in legacy mode, so we can be sure that + * each segment is a single page. + */ + if (type == RTE_MEM_EVENT_ALLOC) + rte_vfio_dma_map(vfio_va, iova, pgsz); + else + rte_vfio_dma_unmap(vfio_va, iova, pgsz); + + cur_len += pgsz; + } +} + int rte_vfio_clear_group(int vfio_group_fd) { @@ -507,6 +538,10 @@ rte_vfio_enable(const char *modname) if (vfio_cfg.vfio_container_fd != -1) { RTE_LOG(NOTICE, EAL, "VFIO support initialized\n"); vfio_cfg.vfio_enabled = 1; + + /* register callback for mem events */ + rte_mem_event_register_callback("vfio_mem_event_clb", + vfio_mem_event_callback); } else { RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n"); } -- 2.7.4