From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id A9A08A04F1; Fri, 13 Dec 2019 15:13:44 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 7CC8E1BF94; Fri, 13 Dec 2019 15:13:39 +0100 (CET) Received: from us-smtp-delivery-1.mimecast.com (us-smtp-1.mimecast.com [207.211.31.81]) by dpdk.org (Postfix) with ESMTP id 3E5D21BF8D for ; Fri, 13 Dec 2019 15:13:37 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1576246416; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=eChFg4knKKoLw7SWxulKYvQhlvXP+awAtpzYacZCbQs=; b=HY8QI8k5grcS0q4KUUWIXcuuO+W7LaF5NYBDbBp3FZsP5EQBOzHPNc12zLbtEj7ZKZX0fK HmMx659pJX5AjPObne0vK55m5Mc9v7vbrseR5e3OcYw/CC+E7wIRmZISwrv678Pru4CAaG lfoVPreseMZZnmzwgg2Q5aiSW/GFetw= Received: from mimecast-mx01.redhat.com (mimecast-mx01.redhat.com [209.132.183.4]) (Using TLS) by relay.mimecast.com with ESMTP id us-mta-209-km5tr3IuOF6bxrHw-lUOxw-1; Fri, 13 Dec 2019 09:13:33 -0500 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx01.redhat.com (Postfix) with ESMTPS id 22849CF98F; Fri, 13 Dec 2019 14:13:32 +0000 (UTC) Received: from localhost.localdomain (ovpn-112-19.ams2.redhat.com [10.36.112.19]) by smtp.corp.redhat.com (Postfix) with ESMTP id 450BE60C05; Fri, 13 Dec 2019 14:13:29 +0000 (UTC) From: Maxime Coquelin To: dev@dpdk.org, tiwei.bie@intel.com, zhihong.wang@intel.com, anatoly.burakov@intel.com Cc: Maxime Coquelin Date: Fri, 13 Dec 2019 15:13:19 +0100 Message-Id: <20191213141322.32730-2-maxime.coquelin@redhat.com> In-Reply-To: <20191213141322.32730-1-maxime.coquelin@redhat.com> References: <20191213141322.32730-1-maxime.coquelin@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.12 X-MC-Unique: km5tr3IuOF6bxrHw-lUOxw-1 X-Mimecast-Spam-Score: 0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Subject: [dpdk-dev] [PATCH 1/4] eal: add new API to register contiguous external memory X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This new API allows to pass a file descriptor while registering external and contiguous memory in the IOVA space. This is required for using Virtio-user PMD with application using external memory for the mbuf's buffers, like Seastar or VPP. FD is only attached to the segments if single_file_segment option is enabled. Signed-off-by: Maxime Coquelin --- lib/librte_eal/common/eal_common_memory.c | 75 +++++++++++++++++++++- lib/librte_eal/common/include/rte_memory.h | 46 +++++++++++++ lib/librte_eal/common/malloc_heap.c | 17 ++++- lib/librte_eal/common/malloc_heap.h | 2 +- lib/librte_eal/common/rte_malloc.c | 2 +- lib/librte_eal/rte_eal_version.map | 3 + 6 files changed, 141 insertions(+), 4 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/com= mon/eal_common_memory.c index 4a9cc1f19a..7a4b371828 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -772,6 +772,79 @@ rte_memseg_get_fd_offset(const struct rte_memseg *ms, = size_t *offset) =09return ret; } =20 +int +rte_extmem_register_contig(void *va_addr, size_t len, rte_iova_t iova_addr= , +=09=09size_t page_sz, int fd) +{ +=09struct rte_mem_config *mcfg =3D rte_eal_get_configuration()->mem_config= ; +=09rte_iova_t *iova_addrs =3D NULL; +=09unsigned int socket_id, n, i; +=09int ret =3D 0; + +=09if (va_addr =3D=3D NULL || page_sz =3D=3D 0 || len =3D=3D 0 || +=09=09=09!rte_is_power_of_2(page_sz) || +=09=09=09RTE_ALIGN(len, page_sz) !=3D len || +=09=09=09((len % page_sz) !=3D 0 || +=09=09=09!rte_is_aligned(va_addr, page_sz))) { +=09=09rte_errno =3D EINVAL; +=09=09return -1; +=09} + +=09n =3D len / page_sz; +=09if (iova_addr !=3D 0) { +=09=09iova_addrs =3D malloc(n * sizeof(*iova_addrs)); +=09=09if (iova_addrs =3D=3D NULL) { +=09=09=09rte_errno =3D -ENOMEM; +=09=09=09return -1; +=09=09} + +=09=09for (i =3D 0; i < n; i++) +=09=09=09iova_addrs[i] =3D iova_addr + n * page_sz; + +=09} + + +=09if (fd >=3D 0 && !internal_config.single_file_segments) { +=09=09RTE_LOG(INFO, EAL, "FD won't be attached to the external memory," \ +=09=09=09=09" requires single file segments\n"); +=09=09fd =3D -1; +=09} +=09rte_mcfg_mem_write_lock(); + +=09/* make sure the segment doesn't already exist */ +=09if (malloc_heap_find_external_seg(va_addr, len) !=3D NULL) { +=09=09rte_errno =3D EEXIST; +=09=09ret =3D -1; +=09=09goto unlock; +=09} + +=09/* get next available socket ID */ +=09socket_id =3D mcfg->next_socket_id; +=09if (socket_id > INT32_MAX) { +=09=09RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n"); +=09=09rte_errno =3D ENOSPC; +=09=09ret =3D -1; +=09=09goto unlock; +=09} + +=09/* we can create a new memseg */ +=09if (malloc_heap_create_external_seg(va_addr, iova_addrs, n, +=09=09=09page_sz, "extmem_contig", socket_id, fd) =3D=3D NULL) { +=09=09ret =3D -1; +=09=09goto unlock; +=09} + +=09/* memseg list successfully created - increment next socket ID */ +=09mcfg->next_socket_id++; +unlock: +=09rte_mcfg_mem_write_unlock(); + +=09if (iova_addrs !=3D NULL) +=09=09free(iova_addrs); + +=09return ret; +} + int rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[], =09=09unsigned int n_pages, size_t page_sz) @@ -809,7 +882,7 @@ rte_extmem_register(void *va_addr, size_t len, rte_iova= _t iova_addrs[], =09/* we can create a new memseg */ =09n =3D len / page_sz; =09if (malloc_heap_create_external_seg(va_addr, iova_addrs, n, -=09=09=09page_sz, "extmem", socket_id) =3D=3D NULL) { +=09=09=09page_sz, "extmem", socket_id, -1) =3D=3D NULL) { =09=09ret =3D -1; =09=09goto unlock; =09} diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/co= mmon/include/rte_memory.h index 3d8d0bd697..e274e47e5e 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -451,6 +451,10 @@ rte_memseg_get_fd_offset_thread_unsafe(const struct rt= e_memseg *ms, * is NULL. * @param page_sz * Page size of the underlying memory + * @param fd + * File descriptor for the external memory region registered. Must be se= t to + * -1 if no FD, and ignored if single-segment isn't not used or if iova + * aren't contiguous (iova_addrs !=3D NULL). * * @return * - 0 on success @@ -461,6 +465,48 @@ rte_memseg_get_fd_offset_thread_unsafe(const struct rt= e_memseg *ms, */ __rte_experimental int +rte_extmem_register_contig(void *va_addr, size_t len, rte_iova_t iova_addr= , +=09=09size_t page_sz, int fd); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Register external contiguous memory chunk with DPDK. + * + * @note Using this API is mutually exclusive with ``rte_malloc`` family o= f + * API's. + * + * @note This API will not perform any DMA mapping. It is expected that us= er + * will do that themselves. + * + * @note Before accessing this memory in other processes, it needs to be + * attached in each of those processes by calling ``rte_extmem_attach`` = in + * each other process. + * + * @param va_addr + * Start of virtual area to register. Must be aligned by ``page_sz``. + * @param len + * Length of virtual area to register. Must be aligned by ``page_sz``. + * @param iova_addr + * IOVA address for the contiguous memory chunck. Can be 0, in which cas= e + * page IOVA addresses will be set to RTE_BAD_IOVA. + * @param page_sz + * Page size of the underlying memory + * @param fd + * File descriptor for the external memory region registered. Must be se= t to + * -1 if no FD, and ignored if single-segment isn't not used. + * + * @return + * - 0 on success + * - -1 in case of error, with rte_errno set to one of the following: + * EINVAL - one of the parameters was invalid + * EEXIST - memory chunk is already registered + * ENOSPC - no more space in internal config to store a new memory chu= nk + * ENOMEM - failed to allocate pages IOVA addresses + */ +__rte_experimental +int rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[], =09=09unsigned int n_pages, size_t page_sz); =20 diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/ma= lloc_heap.c index 842eb9de75..229ab6563c 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -1096,7 +1096,7 @@ destroy_elem(struct malloc_elem *elem, size_t len) struct rte_memseg_list * malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], =09=09unsigned int n_pages, size_t page_sz, const char *seg_name, -=09=09unsigned int socket_id) +=09=09unsigned int socket_id, int fd) { =09struct rte_mem_config *mcfg =3D rte_eal_get_configuration()->mem_config= ; =09char fbarray_name[RTE_FBARRAY_NAME_LEN]; @@ -1153,6 +1153,13 @@ malloc_heap_create_external_seg(void *va_addr, rte_i= ova_t iova_addrs[], =09msl->version =3D 0; =09msl->external =3D 1; =20 +=09if (fd >=3D 0) { +=09=09int list_idx =3D msl - mcfg->memsegs; + +=09=09if (eal_memalloc_set_seg_list_fd(list_idx, fd)) +=09=09=09RTE_LOG(ERR, EAL, "Failed to set segment list FD\n"); +=09} + =09return msl; } =20 @@ -1202,10 +1209,18 @@ malloc_heap_find_external_seg(void *va_addr, size_t= len) int malloc_heap_destroy_external_seg(struct rte_memseg_list *msl) { +=09struct rte_mem_config *mcfg =3D rte_eal_get_configuration()->mem_config= ; +=09int list_idx; + =09/* destroy the fbarray backing this memory */ =09if (rte_fbarray_destroy(&msl->memseg_arr) < 0) =09=09return -1; =20 +=09list_idx =3D msl - mcfg->memsegs; +=09if (eal_memalloc_set_seg_list_fd(list_idx, -1)) +=09=09RTE_LOG(ERR, EAL, "Failed to reset segment list FD\n"); + + =09/* reset the memseg list */ =09memset(msl, 0, sizeof(*msl)); =20 diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/ma= lloc_heap.h index 772736b53f..438ce908de 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -65,7 +65,7 @@ malloc_heap_destroy(struct malloc_heap *heap); struct rte_memseg_list * malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], =09=09unsigned int n_pages, size_t page_sz, const char *seg_name, -=09=09unsigned int socket_id); +=09=09unsigned int socket_id, int fd); =20 struct rte_memseg_list * malloc_heap_find_external_seg(void *va_addr, size_t len); diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte= _malloc.c index d6026a2b17..aa19b0517f 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -389,7 +389,7 @@ rte_malloc_heap_memory_add(const char *heap_name, void = *va_addr, size_t len, =09n =3D len / page_sz; =20 =09msl =3D malloc_heap_create_external_seg(va_addr, iova_addrs, n, page_sz= , -=09=09=09heap_name, heap->socket_id); +=09=09=09heap_name, heap->socket_id, -1); =09if (msl =3D=3D NULL) { =09=09ret =3D -1; =09=09goto unlock; diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_ve= rsion.map index e38d02530c..ddcb4b0512 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -332,4 +332,7 @@ EXPERIMENTAL { =09# added in 19.11 =09rte_log_get_stream; =09rte_mcfg_get_single_file_segments; + +=09# added in 20.02 +=09rte_extmem_register_contig; }; --=20 2.21.0