From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id AF83CA0093 for ; Wed, 9 Mar 2022 10:02:02 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 9D32840395; Wed, 9 Mar 2022 10:02:02 +0100 (CET) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by mails.dpdk.org (Postfix) with ESMTP id 725DE40395 for ; Wed, 9 Mar 2022 10:02:01 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1646816521; x=1678352521; h=from:to:cc:subject:date:message-id; bh=zLEpxDesxe0dXsw9Jetx0h9+17rrhlEF/SFmjvukE6Q=; b=jvi64yz1ZIZsbTxj+kik0HfaJfd5LUIxkWr0/pKKKbJBGSQZo1eyC8Nj o1/MMXA59jYt4c4VtUxP8pSrQO+rFjmaCyAso4Hre601beoAu06Zm9K6J uvYE5wYvab4XqUIXNWGNueBdg1QC7dzKcdtIkBukcd1zy7N45T8z4S26F HD4ykg9sx6Z1Yl5pqK8/j4Nofvhk7aehXPrVj9g3ulvw0Sk5JfTJIM/zA wIjo8ON66km/M8OPwcc5WFiBsbhOeD5655yP9/ZyO+buatkCcd1Zdz1xJ 6DV7o3jCq6psNI8lC24AWZptUtYoz7Wz3MP3LcsYsLoUHp25TelutErvs g==; X-IronPort-AV: E=McAfee;i="6200,9189,10280"; a="235534240" X-IronPort-AV: E=Sophos;i="5.90,167,1643702400"; d="scan'208";a="235534240" Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 09 Mar 2022 01:02:00 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.90,167,1643702400"; d="scan'208";a="611299916" Received: from npg-dpdk-xuan-cbdma.sh.intel.com ([10.67.110.228]) by fmsmga004.fm.intel.com with ESMTP; 09 Mar 2022 01:01:57 -0800 From: xuan.ding@intel.com To: ktraynor@redhat.com, stable@dpdk.org Cc: chenbo.xia@intel.com, maxime.coquelin@redhat.com, Xuan Ding , Yuan Wang Subject: [PATCH 21.11] vhost: fix physical address mapping Date: Wed, 9 Mar 2022 09:00:04 +0000 Message-Id: <20220309090004.18159-1-xuan.ding@intel.com> X-Mailer: git-send-email 2.17.1 X-BeenThere: stable@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: patches for DPDK stable branches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: stable-bounces@dpdk.org From: Xuan Ding [ upstream commit 52ade97e36413ba0dbfbaca03adc5d17d3b770bb ] When choosing IOVA as PA mode, IOVA is likely to be discontinuous, which requires page by page mapping for DMA devices. To be consistent, this patch implements page by page mapping instead of mapping at the region granularity for both IOVA as VA and PA mode. Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost") Cc: stable@dpdk.org Signed-off-by: Xuan Ding Signed-off-by: Yuan Wang Reviewed-by: Maxime Coquelin --- lib/vhost/vhost.h | 1 + lib/vhost/vhost_user.c | 115 ++++++++++++++++++++--------------------- 2 files changed, 57 insertions(+), 59 deletions(-) diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index c5d8b84320..d4586f3341 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -355,6 +355,7 @@ struct vring_packed_desc_event { struct guest_page { uint64_t guest_phys_addr; uint64_t host_iova; + uint64_t host_user_addr; uint64_t size; }; diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index f8ca03835f..850ac49169 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -143,57 +143,57 @@ get_blk_size(int fd) return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; } -static int -async_dma_map(struct rte_vhost_mem_region *region, bool do_map) +static void +async_dma_map(struct virtio_net *dev, bool do_map) { - uint64_t host_iova; int ret = 0; + uint32_t i; + struct guest_page *page; - host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr); if (do_map) { - /* Add mapped region into the default container of DPDK. */ - ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, - region->host_user_addr, - host_iova, - region->size); - if (ret) { - /* - * DMA device may bind with kernel driver, in this case, - * we don't need to program IOMMU manually. However, if no - * device is bound with vfio/uio in DPDK, and vfio kernel - * module is loaded, the API will still be called and return - * with ENODEV/ENOSUP. - * - * DPDK vfio only returns ENODEV/ENOSUP in very similar - * situations(vfio either unsupported, or supported - * but no devices found). Either way, no mappings could be - * performed. We treat it as normal case in async path. - */ - if (rte_errno == ENODEV || rte_errno == ENOTSUP) - return 0; - - VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); - /* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */ - return 0; + for (i = 0; i < dev->nr_guest_pages; i++) { + page = &dev->guest_pages[i]; + ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, + page->host_user_addr, + page->host_iova, + page->size); + if (ret) { + /* + * DMA device may bind with kernel driver, in this case, + * we don't need to program IOMMU manually. However, if no + * device is bound with vfio/uio in DPDK, and vfio kernel + * module is loaded, the API will still be called and return + * with ENODEV. + * + * DPDK vfio only returns ENODEV in very similar situations + * (vfio either unsupported, or supported but no devices found). + * Either way, no mappings could be performed. We treat it as + * normal case in async path. This is a workaround. + */ + if (rte_errno == ENODEV) + return; + + /* DMA mapping errors won't stop VHOST_USER_SET_MEM_TABLE. */ + VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); + } } } else { - /* Remove mapped region from the default container of DPDK. */ - ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, - region->host_user_addr, - host_iova, - region->size); - if (ret) { - /* like DMA map, ignore the kernel driver case when unmap. */ - if (rte_errno == EINVAL) - return 0; - - VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); - return ret; + for (i = 0; i < dev->nr_guest_pages; i++) { + page = &dev->guest_pages[i]; + ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, + page->host_user_addr, + page->host_iova, + page->size); + if (ret) { + /* like DMA map, ignore the kernel driver case when unmap. */ + if (rte_errno == EINVAL) + return; + + VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); + } } } - - return ret; } static void @@ -205,12 +205,12 @@ free_mem_region(struct virtio_net *dev) if (!dev || !dev->mem) return; + if (dev->async_copy && rte_vfio_is_enabled("vfio")) + async_dma_map(dev, false); + for (i = 0; i < dev->mem->nregions; i++) { reg = &dev->mem->regions[i]; if (reg->host_user_addr) { - if (dev->async_copy && rte_vfio_is_enabled("vfio")) - async_dma_map(reg, false); - munmap(reg->mmap_addr, reg->mmap_size); close(reg->fd); } @@ -978,7 +978,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev, static int add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, - uint64_t host_iova, uint64_t size) + uint64_t host_iova, uint64_t host_user_addr, uint64_t size) { struct guest_page *page, *last_page; struct guest_page *old_pages; @@ -999,8 +999,9 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, if (dev->nr_guest_pages > 0) { last_page = &dev->guest_pages[dev->nr_guest_pages - 1]; /* merge if the two pages are continuous */ - if (host_iova == last_page->host_iova + - last_page->size) { + if (host_iova == last_page->host_iova + last_page->size && + guest_phys_addr == last_page->guest_phys_addr + last_page->size && + host_user_addr == last_page->host_user_addr + last_page->size) { last_page->size += size; return 0; } @@ -1009,6 +1010,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, page = &dev->guest_pages[dev->nr_guest_pages++]; page->guest_phys_addr = guest_phys_addr; page->host_iova = host_iova; + page->host_user_addr = host_user_addr; page->size = size; return 0; @@ -1028,7 +1030,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, size = page_size - (guest_phys_addr & (page_size - 1)); size = RTE_MIN(size, reg_size); - if (add_one_guest_page(dev, guest_phys_addr, host_iova, size) < 0) + if (add_one_guest_page(dev, guest_phys_addr, host_iova, + host_user_addr, size) < 0) return -1; host_user_addr += size; @@ -1040,7 +1043,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, host_iova = rte_mem_virt2iova((void *)(uintptr_t) host_user_addr); if (add_one_guest_page(dev, guest_phys_addr, host_iova, - size) < 0) + host_user_addr, size) < 0) return -1; host_user_addr += size; @@ -1215,7 +1218,6 @@ vhost_user_mmap_region(struct virtio_net *dev, uint64_t mmap_size; uint64_t alignment; int populate; - int ret; /* Check for memory_size + mmap_offset overflow */ if (mmap_offset >= -region->size) { @@ -1274,14 +1276,6 @@ vhost_user_mmap_region(struct virtio_net *dev, VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n"); return -1; } - - if (rte_vfio_is_enabled("vfio")) { - ret = async_dma_map(region, true); - if (ret) { - VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n"); - return -1; - } - } } VHOST_LOG_CONFIG(INFO, @@ -1420,6 +1414,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, dev->mem->nregions++; } + if (dev->async_copy && rte_vfio_is_enabled("vfio")) + async_dma_map(dev, true); + if (vhost_user_postcopy_register(dev, main_fd, msg) < 0) goto free_mem_table; -- 2.17.1