DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] vhost: fix physical address mapping
@ 2021-11-10  5:46 Xuan Ding
  2021-11-10  5:56 ` Xuan Ding
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Xuan Ding @ 2021-11-10  5:46 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev, jiayu.hu, xingguang.he, Xuan Ding

When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
which requires page by page mapping for DMA devices. To be consistent,
this patch implements page by page mapping instead of mapping at the
region granularity for both IOVA as VA and PA mode.

Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")

Signed-off-by: Xuan Ding <xuan.ding@intel.com>
---
 lib/vhost/vhost.h      |   1 +
 lib/vhost/vhost_user.c | 104 ++++++++++++++++++++---------------------
 2 files changed, 52 insertions(+), 53 deletions(-)

diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 7085e0885c..d246538ca5 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -355,6 +355,7 @@ struct vring_packed_desc_event {
 struct guest_page {
 	uint64_t guest_phys_addr;
 	uint64_t host_phys_addr;
+	uint64_t host_user_addr;
 	uint64_t size;
 };
 
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index a781346c4d..560b7ab283 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -144,52 +144,54 @@ get_blk_size(int fd)
 }
 
 static int
-async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+async_dma_map(struct virtio_net *dev, bool do_map)
 {
-	uint64_t host_iova;
 	int ret = 0;
-
-	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+	struct guest_page *page;
 	if (do_map) {
-		/* Add mapped region into the default container of DPDK. */
-		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						 region->host_user_addr,
-						 host_iova,
-						 region->size);
-		if (ret) {
-			/*
-			 * DMA device may bind with kernel driver, in this case,
-			 * we don't need to program IOMMU manually. However, if no
-			 * device is bound with vfio/uio in DPDK, and vfio kernel
-			 * module is loaded, the API will still be called and return
-			 * with ENODEV/ENOSUP.
-			 *
-			 * DPDK vfio only returns ENODEV/ENOSUP in very similar
-			 * situations(vfio either unsupported, or supported
-			 * but no devices found). Either way, no mappings could be
-			 * performed. We treat it as normal case in async path.
-			 */
-			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+		for (uint32_t i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							 page->host_user_addr,
+							 page->host_phys_addr,
+							 page->size);
+			if (ret) {
+				/*
+				* DMA device may bind with kernel driver, in this case,
+				* we don't need to program IOMMU manually. However, if no
+				* device is bound with vfio/uio in DPDK, and vfio kernel
+				* module is loaded, the API will still be called and return
+				* with ENODEV/ENOSUP.
+				*
+				* DPDK vfio only returns ENODEV/ENOSUP in very similar
+				* situations(vfio either unsupported, or supported
+				* but no devices found). Either way, no mappings could be
+				* performed. We treat it as normal case in async path.
+				*/
+				if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+					return 0;
+
+				VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+				/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
 				return 0;
-
-			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
-			/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
-			return 0;
+			}
 		}
 
 	} else {
-		/* Remove mapped region from the default container of DPDK. */
-		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						   region->host_user_addr,
-						   host_iova,
-						   region->size);
-		if (ret) {
-			/* like DMA map, ignore the kernel driver case when unmap. */
-			if (rte_errno == EINVAL)
-				return 0;
+		for (uint32_t i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							   page->host_user_addr,
+							   page->host_phys_addr,
+							   page->size);
+			if (ret) {
+				/* like DMA map, ignore the kernel driver case when unmap. */
+				if (rte_errno == EINVAL)
+					return 0;
 
-			VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
-			return ret;
+				VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+				return ret;
+			}
 		}
 	}
 
@@ -205,12 +207,12 @@ free_mem_region(struct virtio_net *dev)
 	if (!dev || !dev->mem)
 		return;
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, false);
+
 	for (i = 0; i < dev->mem->nregions; i++) {
 		reg = &dev->mem->regions[i];
 		if (reg->host_user_addr) {
-			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
-				async_dma_map(reg, false);
-
 			munmap(reg->mmap_addr, reg->mmap_size);
 			close(reg->fd);
 		}
@@ -978,7 +980,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
 
 static int
 add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
-		   uint64_t host_phys_addr, uint64_t size)
+		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t size)
 {
 	struct guest_page *page, *last_page;
 	struct guest_page *old_pages;
@@ -1009,6 +1011,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	page = &dev->guest_pages[dev->nr_guest_pages++];
 	page->guest_phys_addr = guest_phys_addr;
 	page->host_phys_addr  = host_phys_addr;
+	page->host_user_addr = host_user_addr;
 	page->size = size;
 
 	return 0;
@@ -1028,7 +1031,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 	size = page_size - (guest_phys_addr & (page_size - 1));
 	size = RTE_MIN(size, reg_size);
 
-	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
+	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+			       host_user_addr, size) < 0)
 		return -1;
 
 	host_user_addr  += size;
@@ -1040,7 +1044,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
 						  host_user_addr);
 		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
-				size) < 0)
+				       host_user_addr, size) < 0)
 			return -1;
 
 		host_user_addr  += size;
@@ -1215,7 +1219,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 	uint64_t mmap_size;
 	uint64_t alignment;
 	int populate;
-	int ret;
 
 	/* Check for memory_size + mmap_offset overflow */
 	if (mmap_offset >= -region->size) {
@@ -1274,14 +1277,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 			VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
 			return -1;
 		}
-
-		if (rte_vfio_is_enabled("vfio")) {
-			ret = async_dma_map(region, true);
-			if (ret) {
-				VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
-				return -1;
-			}
-		}
 	}
 
 	VHOST_LOG_CONFIG(INFO,
@@ -1420,6 +1415,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		dev->mem->nregions++;
 	}
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, true);
+
 	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
 		goto free_mem_table;
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH] vhost: fix physical address mapping
  2021-11-10  5:46 [dpdk-dev] [PATCH] vhost: fix physical address mapping Xuan Ding
@ 2021-11-10  5:56 ` Xuan Ding
  2021-11-10  6:06 ` [dpdk-dev] [PATCH v3] " Xuan Ding
  2021-11-15 12:32 ` [PATCH v4] " Xuan Ding
  2 siblings, 0 replies; 10+ messages in thread
From: Xuan Ding @ 2021-11-10  5:56 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, jiayu.hu, yuanx.wang, xingguang.he, Xuan Ding

When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
which requires page by page mapping for DMA devices. To be consistent,
this patch implements page by page mapping instead of mapping at the
region granularity for both IOVA as VA and PA mode.

Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")

Signed-off-by: Xuan Ding <xuan.ding@intel.com>
---

v2:
* Fix a format issue.
---
 lib/vhost/vhost.h      |   1 +
 lib/vhost/vhost_user.c | 104 ++++++++++++++++++++---------------------
 2 files changed, 52 insertions(+), 53 deletions(-)

diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 7085e0885c..d246538ca5 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -355,6 +355,7 @@ struct vring_packed_desc_event {
 struct guest_page {
 	uint64_t guest_phys_addr;
 	uint64_t host_phys_addr;
+	uint64_t host_user_addr;
 	uint64_t size;
 };
 
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index a781346c4d..63e3ae83d0 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -144,52 +144,54 @@ get_blk_size(int fd)
 }
 
 static int
-async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+async_dma_map(struct virtio_net *dev, bool do_map)
 {
-	uint64_t host_iova;
 	int ret = 0;
-
-	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+	struct guest_page *page;
 	if (do_map) {
-		/* Add mapped region into the default container of DPDK. */
-		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						 region->host_user_addr,
-						 host_iova,
-						 region->size);
-		if (ret) {
-			/*
-			 * DMA device may bind with kernel driver, in this case,
-			 * we don't need to program IOMMU manually. However, if no
-			 * device is bound with vfio/uio in DPDK, and vfio kernel
-			 * module is loaded, the API will still be called and return
-			 * with ENODEV/ENOSUP.
-			 *
-			 * DPDK vfio only returns ENODEV/ENOSUP in very similar
-			 * situations(vfio either unsupported, or supported
-			 * but no devices found). Either way, no mappings could be
-			 * performed. We treat it as normal case in async path.
-			 */
-			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+		for (uint32_t i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							 page->host_user_addr,
+							 page->host_phys_addr,
+							 page->size);
+			if (ret) {
+				/*
+				 * DMA device may bind with kernel driver, in this case,
+				 * we don't need to program IOMMU manually. However, if no
+				 * device is bound with vfio/uio in DPDK, and vfio kernel
+				 * module is loaded, the API will still be called and return
+				 * with ENODEV/ENOSUP.
+				 *
+				 * DPDK vfio only returns ENODEV/ENOSUP in very similar
+				 * situations(vfio either unsupported, or supported
+				 * but no devices found). Either way, no mappings could be
+				 * performed. We treat it as normal case in async path.
+				 */
+				if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+					return 0;
+
+				VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+				/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
 				return 0;
-
-			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
-			/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
-			return 0;
+			}
 		}
 
 	} else {
-		/* Remove mapped region from the default container of DPDK. */
-		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						   region->host_user_addr,
-						   host_iova,
-						   region->size);
-		if (ret) {
-			/* like DMA map, ignore the kernel driver case when unmap. */
-			if (rte_errno == EINVAL)
-				return 0;
+		for (uint32_t i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							   page->host_user_addr,
+							   page->host_phys_addr,
+							   page->size);
+			if (ret) {
+				/* like DMA map, ignore the kernel driver case when unmap. */
+				if (rte_errno == EINVAL)
+					return 0;
 
-			VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
-			return ret;
+				VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+				return ret;
+			}
 		}
 	}
 
@@ -205,12 +207,12 @@ free_mem_region(struct virtio_net *dev)
 	if (!dev || !dev->mem)
 		return;
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, false);
+
 	for (i = 0; i < dev->mem->nregions; i++) {
 		reg = &dev->mem->regions[i];
 		if (reg->host_user_addr) {
-			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
-				async_dma_map(reg, false);
-
 			munmap(reg->mmap_addr, reg->mmap_size);
 			close(reg->fd);
 		}
@@ -978,7 +980,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
 
 static int
 add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
-		   uint64_t host_phys_addr, uint64_t size)
+		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t size)
 {
 	struct guest_page *page, *last_page;
 	struct guest_page *old_pages;
@@ -1009,6 +1011,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	page = &dev->guest_pages[dev->nr_guest_pages++];
 	page->guest_phys_addr = guest_phys_addr;
 	page->host_phys_addr  = host_phys_addr;
+	page->host_user_addr = host_user_addr;
 	page->size = size;
 
 	return 0;
@@ -1028,7 +1031,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 	size = page_size - (guest_phys_addr & (page_size - 1));
 	size = RTE_MIN(size, reg_size);
 
-	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
+	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+			       host_user_addr, size) < 0)
 		return -1;
 
 	host_user_addr  += size;
@@ -1040,7 +1044,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
 						  host_user_addr);
 		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
-				size) < 0)
+				       host_user_addr, size) < 0)
 			return -1;
 
 		host_user_addr  += size;
@@ -1215,7 +1219,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 	uint64_t mmap_size;
 	uint64_t alignment;
 	int populate;
-	int ret;
 
 	/* Check for memory_size + mmap_offset overflow */
 	if (mmap_offset >= -region->size) {
@@ -1274,14 +1277,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 			VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
 			return -1;
 		}
-
-		if (rte_vfio_is_enabled("vfio")) {
-			ret = async_dma_map(region, true);
-			if (ret) {
-				VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
-				return -1;
-			}
-		}
 	}
 
 	VHOST_LOG_CONFIG(INFO,
@@ -1420,6 +1415,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		dev->mem->nregions++;
 	}
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, true);
+
 	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
 		goto free_mem_table;
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v3] vhost: fix physical address mapping
  2021-11-10  5:46 [dpdk-dev] [PATCH] vhost: fix physical address mapping Xuan Ding
  2021-11-10  5:56 ` Xuan Ding
@ 2021-11-10  6:06 ` Xuan Ding
  2021-11-15  7:20   ` Xia, Chenbo
  2021-11-15 12:32 ` [PATCH v4] " Xuan Ding
  2 siblings, 1 reply; 10+ messages in thread
From: Xuan Ding @ 2021-11-10  6:06 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, jiayu.hu, yuanx.wang, xingguang.he, Xuan Ding

When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
which requires page by page mapping for DMA devices. To be consistent,
this patch implements page by page mapping instead of mapping at the
region granularity for both IOVA as VA and PA mode.

Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")

Signed-off-by: Xuan Ding <xuan.ding@intel.com>
---

v3:
* Fix commit title.

v2:
* Fix a format issue.
---
 lib/vhost/vhost.h      |   1 +
 lib/vhost/vhost_user.c | 105 ++++++++++++++++++++---------------------
 2 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 7085e0885c..d246538ca5 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -355,6 +355,7 @@ struct vring_packed_desc_event {
 struct guest_page {
 	uint64_t guest_phys_addr;
 	uint64_t host_phys_addr;
+	uint64_t host_user_addr;
 	uint64_t size;
 };
 
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index a781346c4d..37cdedda3c 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -144,52 +144,55 @@ get_blk_size(int fd)
 }
 
 static int
-async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+async_dma_map(struct virtio_net *dev, bool do_map)
 {
-	uint64_t host_iova;
 	int ret = 0;
-
-	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+	uint32_t i;
+	struct guest_page *page;
 	if (do_map) {
-		/* Add mapped region into the default container of DPDK. */
-		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						 region->host_user_addr,
-						 host_iova,
-						 region->size);
-		if (ret) {
-			/*
-			 * DMA device may bind with kernel driver, in this case,
-			 * we don't need to program IOMMU manually. However, if no
-			 * device is bound with vfio/uio in DPDK, and vfio kernel
-			 * module is loaded, the API will still be called and return
-			 * with ENODEV/ENOSUP.
-			 *
-			 * DPDK vfio only returns ENODEV/ENOSUP in very similar
-			 * situations(vfio either unsupported, or supported
-			 * but no devices found). Either way, no mappings could be
-			 * performed. We treat it as normal case in async path.
-			 */
-			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+		for (i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							 page->host_user_addr,
+							 page->host_phys_addr,
+							 page->size);
+			if (ret) {
+				/*
+				 * DMA device may bind with kernel driver, in this case,
+				 * we don't need to program IOMMU manually. However, if no
+				 * device is bound with vfio/uio in DPDK, and vfio kernel
+				 * module is loaded, the API will still be called and return
+				 * with ENODEV/ENOSUP.
+				 *
+				 * DPDK vfio only returns ENODEV/ENOSUP in very similar
+				 * situations(vfio either unsupported, or supported
+				 * but no devices found). Either way, no mappings could be
+				 * performed. We treat it as normal case in async path.
+				 */
+				if (rte_errno == ENODEV || rte_errno == ENOTSUP)
+					return 0;
+
+				VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+				/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
 				return 0;
-
-			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
-			/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
-			return 0;
+			}
 		}
 
 	} else {
-		/* Remove mapped region from the default container of DPDK. */
-		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						   region->host_user_addr,
-						   host_iova,
-						   region->size);
-		if (ret) {
-			/* like DMA map, ignore the kernel driver case when unmap. */
-			if (rte_errno == EINVAL)
-				return 0;
+		for (i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							   page->host_user_addr,
+							   page->host_phys_addr,
+							   page->size);
+			if (ret) {
+				/* like DMA map, ignore the kernel driver case when unmap. */
+				if (rte_errno == EINVAL)
+					return 0;
 
-			VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
-			return ret;
+				VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+				return ret;
+			}
 		}
 	}
 
@@ -205,12 +208,12 @@ free_mem_region(struct virtio_net *dev)
 	if (!dev || !dev->mem)
 		return;
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, false);
+
 	for (i = 0; i < dev->mem->nregions; i++) {
 		reg = &dev->mem->regions[i];
 		if (reg->host_user_addr) {
-			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
-				async_dma_map(reg, false);
-
 			munmap(reg->mmap_addr, reg->mmap_size);
 			close(reg->fd);
 		}
@@ -978,7 +981,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
 
 static int
 add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
-		   uint64_t host_phys_addr, uint64_t size)
+		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t size)
 {
 	struct guest_page *page, *last_page;
 	struct guest_page *old_pages;
@@ -1009,6 +1012,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	page = &dev->guest_pages[dev->nr_guest_pages++];
 	page->guest_phys_addr = guest_phys_addr;
 	page->host_phys_addr  = host_phys_addr;
+	page->host_user_addr = host_user_addr;
 	page->size = size;
 
 	return 0;
@@ -1028,7 +1032,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 	size = page_size - (guest_phys_addr & (page_size - 1));
 	size = RTE_MIN(size, reg_size);
 
-	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
+	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+			       host_user_addr, size) < 0)
 		return -1;
 
 	host_user_addr  += size;
@@ -1040,7 +1045,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
 						  host_user_addr);
 		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
-				size) < 0)
+				       host_user_addr, size) < 0)
 			return -1;
 
 		host_user_addr  += size;
@@ -1215,7 +1220,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 	uint64_t mmap_size;
 	uint64_t alignment;
 	int populate;
-	int ret;
 
 	/* Check for memory_size + mmap_offset overflow */
 	if (mmap_offset >= -region->size) {
@@ -1274,14 +1278,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 			VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
 			return -1;
 		}
-
-		if (rte_vfio_is_enabled("vfio")) {
-			ret = async_dma_map(region, true);
-			if (ret) {
-				VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
-				return -1;
-			}
-		}
 	}
 
 	VHOST_LOG_CONFIG(INFO,
@@ -1420,6 +1416,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		dev->mem->nregions++;
 	}
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, true);
+
 	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
 		goto free_mem_table;
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v3] vhost: fix physical address mapping
  2021-11-10  6:06 ` [dpdk-dev] [PATCH v3] " Xuan Ding
@ 2021-11-15  7:20   ` Xia, Chenbo
  2021-11-15  8:13     ` Ding, Xuan
  0 siblings, 1 reply; 10+ messages in thread
From: Xia, Chenbo @ 2021-11-15  7:20 UTC (permalink / raw)
  To: Ding, Xuan, maxime.coquelin; +Cc: dev, Hu, Jiayu, Wang, YuanX, He, Xingguang

Hi Xuan,

> -----Original Message-----
> From: Ding, Xuan <xuan.ding@intel.com>
> Sent: Wednesday, November 10, 2021 2:07 PM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; Ding, Xuan
> <xuan.ding@intel.com>
> Subject: [PATCH v3] vhost: fix physical address mapping
> 
> When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
> which requires page by page mapping for DMA devices. To be consistent,
> this patch implements page by page mapping instead of mapping at the
> region granularity for both IOVA as VA and PA mode.
> 
> Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
> 
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> ---
> 
> v3:
> * Fix commit title.
> 
> v2:
> * Fix a format issue.
> ---
>  lib/vhost/vhost.h      |   1 +
>  lib/vhost/vhost_user.c | 105 ++++++++++++++++++++---------------------
>  2 files changed, 53 insertions(+), 53 deletions(-)
> 
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 7085e0885c..d246538ca5 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -355,6 +355,7 @@ struct vring_packed_desc_event {
>  struct guest_page {
>  	uint64_t guest_phys_addr;
>  	uint64_t host_phys_addr;
> +	uint64_t host_user_addr;
>  	uint64_t size;
>  };
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index a781346c4d..37cdedda3c 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -144,52 +144,55 @@ get_blk_size(int fd)
>  }
> 
>  static int
> -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
> +async_dma_map(struct virtio_net *dev, bool do_map)
>  {
> -	uint64_t host_iova;
>  	int ret = 0;
> -
> -	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
> +	uint32_t i;
> +	struct guest_page *page;
>  	if (do_map) {
> -		/* Add mapped region into the default container of DPDK. */
> -		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> -						 region->host_user_addr,
> -						 host_iova,
> -						 region->size);
> -		if (ret) {
> -			/*
> -			 * DMA device may bind with kernel driver, in this case,
> -			 * we don't need to program IOMMU manually. However, if no
> -			 * device is bound with vfio/uio in DPDK, and vfio kernel
> -			 * module is loaded, the API will still be called and return
> -			 * with ENODEV/ENOSUP.
> -			 *
> -			 * DPDK vfio only returns ENODEV/ENOSUP in very similar
> -			 * situations(vfio either unsupported, or supported
> -			 * but no devices found). Either way, no mappings could be
> -			 * performed. We treat it as normal case in async path.
> -			 */
> -			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> +		for (i = 0; i < dev->nr_guest_pages; i++) {
> +			page = &dev->guest_pages[i];
> +			ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +							 page->host_user_addr,
> +							 page->host_phys_addr,
> +							 page->size);
> +			if (ret) {
> +				/*
> +				 * DMA device may bind with kernel driver, in this
> case,
> +				 * we don't need to program IOMMU manually. However,
> if no
> +				 * device is bound with vfio/uio in DPDK, and vfio
> kernel
> +				 * module is loaded, the API will still be called and
> return
> +				 * with ENODEV/ENOSUP.
> +				 *
> +				 * DPDK vfio only returns ENODEV/ENOSUP in very
> similar
> +				 * situations(vfio either unsupported, or supported
> +				 * but no devices found). Either way, no mappings
> could be
> +				 * performed. We treat it as normal case in async path.
> +				 */
> +				if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> +					return 0;

I don't think this logic is good enough to only include the case of kernel driver +
unneeded mapping. Could also be vfio driver + incorrect mapping. It's not good to
assume ENODEV and ENOTSUP only comes from DPDK, it could be from kernel.

> +
> +				VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> +				/* DMA mapping errors won't stop
> VHST_USER_SET_MEM_TABLE. */
>  				return 0;

I understand this function covers many cases and it's difficult to differentiate,
So you don't check the return value but use the log here to inform users.

I suggest to use a WARNING log (since this could fail with kernel driver case but
it's actually correct) and print the errno info for users.

Note: this is only a workaround, not a perfect solution. But since vhost with dmadev
is in progress and most likely vhost lib will aware of dmadev id. The problem could
be solved later (some dmadev api could be used to know VA/PA mode and kernel/user driver?)

> -
> -			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> -			/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
> -			return 0;
> +			}
>  		}
> 
>  	} else {
> -		/* Remove mapped region from the default container of DPDK. */
> -		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> -						   region->host_user_addr,
> -						   host_iova,
> -						   region->size);
> -		if (ret) {
> -			/* like DMA map, ignore the kernel driver case when unmap.
> */
> -			if (rte_errno == EINVAL)
> -				return 0;
> +		for (i = 0; i < dev->nr_guest_pages; i++) {
> +			page = &dev->guest_pages[i];
> +			ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +							   page->host_user_addr,
> +							   page->host_phys_addr,
> +							   page->size);
> +			if (ret) {
> +				/* like DMA map, ignore the kernel driver case when
> unmap. */
> +				if (rte_errno == EINVAL)
> +					return 0;
> 
> -			VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
> -			return ret;
> +				VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
> +				return ret;

Same here.

And since you don't check return value, you can just don't return anything and
return type can be void

Thanks,
Chenbo


> +			}
>  		}
>  	}
> 
> @@ -205,12 +208,12 @@ free_mem_region(struct virtio_net *dev)
>  	if (!dev || !dev->mem)
>  		return;
> 
> +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +		async_dma_map(dev, false);
> +
>  	for (i = 0; i < dev->mem->nregions; i++) {
>  		reg = &dev->mem->regions[i];
>  		if (reg->host_user_addr) {
> -			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> -				async_dma_map(reg, false);
> -
>  			munmap(reg->mmap_addr, reg->mmap_size);
>  			close(reg->fd);
>  		}
> @@ -978,7 +981,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
> 
>  static int
>  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> -		   uint64_t host_phys_addr, uint64_t size)
> +		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t size)
>  {
>  	struct guest_page *page, *last_page;
>  	struct guest_page *old_pages;
> @@ -1009,6 +1012,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t
> guest_phys_addr,
>  	page = &dev->guest_pages[dev->nr_guest_pages++];
>  	page->guest_phys_addr = guest_phys_addr;
>  	page->host_phys_addr  = host_phys_addr;
> +	page->host_user_addr = host_user_addr;
>  	page->size = size;
> 
>  	return 0;
> @@ -1028,7 +1032,8 @@ add_guest_pages(struct virtio_net *dev, struct
> rte_vhost_mem_region *reg,
>  	size = page_size - (guest_phys_addr & (page_size - 1));
>  	size = RTE_MIN(size, reg_size);
> 
> -	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
> +	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> +			       host_user_addr, size) < 0)
>  		return -1;
> 
>  	host_user_addr  += size;
> @@ -1040,7 +1045,7 @@ add_guest_pages(struct virtio_net *dev, struct
> rte_vhost_mem_region *reg,
>  		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
>  						  host_user_addr);
>  		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> -				size) < 0)
> +				       host_user_addr, size) < 0)
>  			return -1;
> 
>  		host_user_addr  += size;
> @@ -1215,7 +1220,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
>  	uint64_t mmap_size;
>  	uint64_t alignment;
>  	int populate;
> -	int ret;
> 
>  	/* Check for memory_size + mmap_offset overflow */
>  	if (mmap_offset >= -region->size) {
> @@ -1274,14 +1278,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
>  			VHOST_LOG_CONFIG(ERR, "adding guest pages to region
> failed.\n");
>  			return -1;
>  		}
> -
> -		if (rte_vfio_is_enabled("vfio")) {
> -			ret = async_dma_map(region, true);
> -			if (ret) {
> -				VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine
> failed\n");
> -				return -1;
> -			}
> -		}
>  	}
> 
>  	VHOST_LOG_CONFIG(INFO,
> @@ -1420,6 +1416,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>  		dev->mem->nregions++;
>  	}
> 
> +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +		async_dma_map(dev, true);
> +
>  	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
>  		goto free_mem_table;
> 
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v3] vhost: fix physical address mapping
  2021-11-15  7:20   ` Xia, Chenbo
@ 2021-11-15  8:13     ` Ding, Xuan
  2021-11-15 12:11       ` Xia, Chenbo
  0 siblings, 1 reply; 10+ messages in thread
From: Ding, Xuan @ 2021-11-15  8:13 UTC (permalink / raw)
  To: Xia, Chenbo, maxime.coquelin
  Cc: dev, Hu, Jiayu, Wang, YuanX, He, Xingguang, Burakov, Anatoly

Hi Chenbo,

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: 2021年11月15日 15:21
> To: Ding, Xuan <xuan.ding@intel.com>; maxime.coquelin@redhat.com
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>
> Subject: RE: [PATCH v3] vhost: fix physical address mapping
> 
> Hi Xuan,
> 
> > -----Original Message-----
> > From: Ding, Xuan <xuan.ding@intel.com>
> > Sent: Wednesday, November 10, 2021 2:07 PM
> > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
> > <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; Ding,
> > Xuan <xuan.ding@intel.com>
> > Subject: [PATCH v3] vhost: fix physical address mapping
> >
> > When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
> > which requires page by page mapping for DMA devices. To be consistent,
> > this patch implements page by page mapping instead of mapping at the
> > region granularity for both IOVA as VA and PA mode.
> >
> > Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
> >
> > Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> > ---
> >
> > v3:
> > * Fix commit title.
> >
> > v2:
> > * Fix a format issue.
> > ---
> >  lib/vhost/vhost.h      |   1 +
> >  lib/vhost/vhost_user.c | 105
> > ++++++++++++++++++++---------------------
> >  2 files changed, 53 insertions(+), 53 deletions(-)
> >
> > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> > 7085e0885c..d246538ca5 100644
> > --- a/lib/vhost/vhost.h
> > +++ b/lib/vhost/vhost.h
> > @@ -355,6 +355,7 @@ struct vring_packed_desc_event {  struct
> > guest_page {
> >  	uint64_t guest_phys_addr;
> >  	uint64_t host_phys_addr;
> > +	uint64_t host_user_addr;
> >  	uint64_t size;
> >  };
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > a781346c4d..37cdedda3c 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -144,52 +144,55 @@ get_blk_size(int fd)  }
> >
> >  static int
> > -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
> > +async_dma_map(struct virtio_net *dev, bool do_map)
> >  {
> > -	uint64_t host_iova;
> >  	int ret = 0;
> > -
> > -	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
> >host_user_addr);
> > +	uint32_t i;
> > +	struct guest_page *page;
> >  	if (do_map) {
> > -		/* Add mapped region into the default container of DPDK. */
> > -		ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > -						 region->host_user_addr,
> > -						 host_iova,
> > -						 region->size);
> > -		if (ret) {
> > -			/*
> > -			 * DMA device may bind with kernel driver, in this case,
> > -			 * we don't need to program IOMMU manually.
> However, if no
> > -			 * device is bound with vfio/uio in DPDK, and vfio kernel
> > -			 * module is loaded, the API will still be called and
> return
> > -			 * with ENODEV/ENOSUP.
> > -			 *
> > -			 * DPDK vfio only returns ENODEV/ENOSUP in very
> similar
> > -			 * situations(vfio either unsupported, or supported
> > -			 * but no devices found). Either way, no mappings could
> be
> > -			 * performed. We treat it as normal case in async path.
> > -			 */
> > -			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> > +		for (i = 0; i < dev->nr_guest_pages; i++) {
> > +			page = &dev->guest_pages[i];
> > +			ret =
> > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > +							 page-
> >host_user_addr,
> > +							 page-
> >host_phys_addr,
> > +							 page->size);
> > +			if (ret) {
> > +				/*
> > +				 * DMA device may bind with kernel driver, in
> this
> > case,
> > +				 * we don't need to program IOMMU manually.
> However,
> > if no
> > +				 * device is bound with vfio/uio in DPDK, and
> vfio
> > kernel
> > +				 * module is loaded, the API will still be called
> and
> > return
> > +				 * with ENODEV/ENOSUP.
> > +				 *
> > +				 * DPDK vfio only returns ENODEV/ENOSUP in
> very
> > similar
> > +				 * situations(vfio either unsupported, or
> supported
> > +				 * but no devices found). Either way, no
> mappings
> > could be
> > +				 * performed. We treat it as normal case in
> async path.
> > +				 */
> > +				if (rte_errno == ENODEV || rte_errno ==
> ENOTSUP)
> > +					return 0;
> 
> I don't think this logic is good enough to only include the case of kernel driver +
> unneeded mapping. Could also be vfio driver + incorrect mapping. It's not good
> to assume ENODEV and ENOTSUP only comes from DPDK, it could be from
> kernel.

The code here is a workaround to be compatible with kernel driver, since we can't
know the device driver in vhost for now. In fact, the appropriate approach for justifying
whether perform mapping here should base on the device driver.

> 
> > +
> > +				VHOST_LOG_CONFIG(ERR, "DMA engine map
> failed\n");
> > +				/* DMA mapping errors won't stop
> > VHST_USER_SET_MEM_TABLE. */
> >  				return 0;
> 
> I understand this function covers many cases and it's difficult to differentiate, So
> you don't check the return value but use the log here to inform users.
> 
> I suggest to use a WARNING log (since this could fail with kernel driver case but
> it's actually correct) and print the errno info for users.

Thanks for your suggestion. Accordingly, I will update the doc to inform user should
take which action according to different warning message.

> 
> Note: this is only a workaround, not a perfect solution. But since vhost with
> dmadev is in progress and most likely vhost lib will aware of dmadev id. The
> problem could be solved later (some dmadev api could be used to know VA/PA
> mode and kernel/user driver?)
 
With rte_iova_mode(), we can know current IOVA mode in DPDK. It will not affect
vfio mapping because bound a device to vfio always requires mapping. For dmadev,
it is possible to know current driver, since the integration for async vhost with dmadev
is in progress, so let's use this solution for now.

> 
> > -
> > -			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> > -			/* DMA mapping errors won't stop
> VHST_USER_SET_MEM_TABLE. */
> > -			return 0;
> > +			}
> >  		}
> >
> >  	} else {
> > -		/* Remove mapped region from the default container of DPDK.
> */
> > -		ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > -						   region->host_user_addr,
> > -						   host_iova,
> > -						   region->size);
> > -		if (ret) {
> > -			/* like DMA map, ignore the kernel driver case when
> unmap.
> > */
> > -			if (rte_errno == EINVAL)
> > -				return 0;
> > +		for (i = 0; i < dev->nr_guest_pages; i++) {
> > +			page = &dev->guest_pages[i];
> > +			ret =
> > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > +							   page-
> >host_user_addr,
> > +							   page-
> >host_phys_addr,
> > +							   page->size);
> > +			if (ret) {
> > +				/* like DMA map, ignore the kernel driver case
> when
> > unmap. */
> > +				if (rte_errno == EINVAL)
> > +					return 0;
> >
> > -			VHOST_LOG_CONFIG(ERR, "DMA engine unmap
> failed\n");
> > -			return ret;
> > +				VHOST_LOG_CONFIG(ERR, "DMA engine
> unmap failed\n");
> > +				return ret;
> 
> Same here.
> 
> And since you don't check return value, you can just don't return anything and
> return type can be void

Good catch, pls see next version.

Thanks,
Xuan

> 
> Thanks,
> Chenbo
> 
> 
> > +			}
> >  		}
> >  	}
> >
> > @@ -205,12 +208,12 @@ free_mem_region(struct virtio_net *dev)
> >  	if (!dev || !dev->mem)
> >  		return;
> >
> > +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > +		async_dma_map(dev, false);
> > +
> >  	for (i = 0; i < dev->mem->nregions; i++) {
> >  		reg = &dev->mem->regions[i];
> >  		if (reg->host_user_addr) {
> > -			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > -				async_dma_map(reg, false);
> > -
> >  			munmap(reg->mmap_addr, reg->mmap_size);
> >  			close(reg->fd);
> >  		}
> > @@ -978,7 +981,7 @@ vhost_user_set_vring_base(struct virtio_net
> > **pdev,
> >
> >  static int
> >  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> > -		   uint64_t host_phys_addr, uint64_t size)
> > +		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t
> size)
> >  {
> >  	struct guest_page *page, *last_page;
> >  	struct guest_page *old_pages;
> > @@ -1009,6 +1012,7 @@ add_one_guest_page(struct virtio_net *dev,
> > uint64_t guest_phys_addr,
> >  	page = &dev->guest_pages[dev->nr_guest_pages++];
> >  	page->guest_phys_addr = guest_phys_addr;
> >  	page->host_phys_addr  = host_phys_addr;
> > +	page->host_user_addr = host_user_addr;
> >  	page->size = size;
> >
> >  	return 0;
> > @@ -1028,7 +1032,8 @@ add_guest_pages(struct virtio_net *dev, struct
> > rte_vhost_mem_region *reg,
> >  	size = page_size - (guest_phys_addr & (page_size - 1));
> >  	size = RTE_MIN(size, reg_size);
> >
> > -	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) <
> 0)
> > +	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> > +			       host_user_addr, size) < 0)
> >  		return -1;
> >
> >  	host_user_addr  += size;
> > @@ -1040,7 +1045,7 @@ add_guest_pages(struct virtio_net *dev, struct
> > rte_vhost_mem_region *reg,
> >  		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
> >  						  host_user_addr);
> >  		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> > -				size) < 0)
> > +				       host_user_addr, size) < 0)
> >  			return -1;
> >
> >  		host_user_addr  += size;
> > @@ -1215,7 +1220,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> >  	uint64_t mmap_size;
> >  	uint64_t alignment;
> >  	int populate;
> > -	int ret;
> >
> >  	/* Check for memory_size + mmap_offset overflow */
> >  	if (mmap_offset >= -region->size) {
> > @@ -1274,14 +1278,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> >  			VHOST_LOG_CONFIG(ERR, "adding guest pages to
> region failed.\n");
> >  			return -1;
> >  		}
> > -
> > -		if (rte_vfio_is_enabled("vfio")) {
> > -			ret = async_dma_map(region, true);
> > -			if (ret) {
> > -				VHOST_LOG_CONFIG(ERR, "Configure IOMMU
> for DMA engine
> > failed\n");
> > -				return -1;
> > -			}
> > -		}
> >  	}
> >
> >  	VHOST_LOG_CONFIG(INFO,
> > @@ -1420,6 +1416,9 @@ vhost_user_set_mem_table(struct virtio_net
> > **pdev, struct VhostUserMsg *msg,
> >  		dev->mem->nregions++;
> >  	}
> >
> > +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > +		async_dma_map(dev, true);
> > +
> >  	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
> >  		goto free_mem_table;
> >
> > --
> > 2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v3] vhost: fix physical address mapping
  2021-11-15  8:13     ` Ding, Xuan
@ 2021-11-15 12:11       ` Xia, Chenbo
  0 siblings, 0 replies; 10+ messages in thread
From: Xia, Chenbo @ 2021-11-15 12:11 UTC (permalink / raw)
  To: Ding, Xuan, maxime.coquelin
  Cc: dev, Hu, Jiayu, Wang, YuanX, He, Xingguang, Burakov, Anatoly

> -----Original Message-----
> From: Ding, Xuan <xuan.ding@intel.com>
> Sent: Monday, November 15, 2021 4:13 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; Burakov,
> Anatoly <anatoly.burakov@intel.com>
> Subject: RE: [PATCH v3] vhost: fix physical address mapping
> 
> Hi Chenbo,
> 
> > -----Original Message-----
> > From: Xia, Chenbo <chenbo.xia@intel.com>
> > Sent: 2021年11月15日 15:21
> > To: Ding, Xuan <xuan.ding@intel.com>; maxime.coquelin@redhat.com
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
> > <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>
> > Subject: RE: [PATCH v3] vhost: fix physical address mapping
> >
> > Hi Xuan,
> >
> > > -----Original Message-----
> > > From: Ding, Xuan <xuan.ding@intel.com>
> > > Sent: Wednesday, November 10, 2021 2:07 PM
> > > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
> > > <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; Ding,
> > > Xuan <xuan.ding@intel.com>
> > > Subject: [PATCH v3] vhost: fix physical address mapping
> > >
> > > When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
> > > which requires page by page mapping for DMA devices. To be consistent,
> > > this patch implements page by page mapping instead of mapping at the
> > > region granularity for both IOVA as VA and PA mode.
> > >
> > > Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
> > >
> > > Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> > > ---
> > >
> > > v3:
> > > * Fix commit title.
> > >
> > > v2:
> > > * Fix a format issue.
> > > ---
> > >  lib/vhost/vhost.h      |   1 +
> > >  lib/vhost/vhost_user.c | 105
> > > ++++++++++++++++++++---------------------
> > >  2 files changed, 53 insertions(+), 53 deletions(-)
> > >
> > > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> > > 7085e0885c..d246538ca5 100644
> > > --- a/lib/vhost/vhost.h
> > > +++ b/lib/vhost/vhost.h
> > > @@ -355,6 +355,7 @@ struct vring_packed_desc_event {  struct
> > > guest_page {
> > >  	uint64_t guest_phys_addr;
> > >  	uint64_t host_phys_addr;
> > > +	uint64_t host_user_addr;
> > >  	uint64_t size;
> > >  };
> > >
> > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > > a781346c4d..37cdedda3c 100644
> > > --- a/lib/vhost/vhost_user.c
> > > +++ b/lib/vhost/vhost_user.c
> > > @@ -144,52 +144,55 @@ get_blk_size(int fd)  }
> > >
> > >  static int
> > > -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
> > > +async_dma_map(struct virtio_net *dev, bool do_map)
> > >  {
> > > -	uint64_t host_iova;
> > >  	int ret = 0;
> > > -
> > > -	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
> > >host_user_addr);
> > > +	uint32_t i;
> > > +	struct guest_page *page;
> > >  	if (do_map) {
> > > -		/* Add mapped region into the default container of DPDK. */
> > > -		ret =
> > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > > -						 region->host_user_addr,
> > > -						 host_iova,
> > > -						 region->size);
> > > -		if (ret) {
> > > -			/*
> > > -			 * DMA device may bind with kernel driver, in this case,
> > > -			 * we don't need to program IOMMU manually.
> > However, if no
> > > -			 * device is bound with vfio/uio in DPDK, and vfio kernel
> > > -			 * module is loaded, the API will still be called and
> > return
> > > -			 * with ENODEV/ENOSUP.
> > > -			 *
> > > -			 * DPDK vfio only returns ENODEV/ENOSUP in very
> > similar
> > > -			 * situations(vfio either unsupported, or supported
> > > -			 * but no devices found). Either way, no mappings could
> > be
> > > -			 * performed. We treat it as normal case in async path.
> > > -			 */
> > > -			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> > > +		for (i = 0; i < dev->nr_guest_pages; i++) {
> > > +			page = &dev->guest_pages[i];
> > > +			ret =
> > > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > > +							 page-
> > >host_user_addr,
> > > +							 page-
> > >host_phys_addr,
> > > +							 page->size);
> > > +			if (ret) {
> > > +				/*
> > > +				 * DMA device may bind with kernel driver, in
> > this
> > > case,
> > > +				 * we don't need to program IOMMU manually.
> > However,
> > > if no
> > > +				 * device is bound with vfio/uio in DPDK, and
> > vfio
> > > kernel
> > > +				 * module is loaded, the API will still be called
> > and
> > > return
> > > +				 * with ENODEV/ENOSUP.
> > > +				 *
> > > +				 * DPDK vfio only returns ENODEV/ENOSUP in
> > very
> > > similar
> > > +				 * situations(vfio either unsupported, or
> > supported
> > > +				 * but no devices found). Either way, no
> > mappings
> > > could be
> > > +				 * performed. We treat it as normal case in
> > async path.
> > > +				 */
> > > +				if (rte_errno == ENODEV || rte_errno ==
> > ENOTSUP)
> > > +					return 0;
> >
> > I don't think this logic is good enough to only include the case of kernel
> driver +
> > unneeded mapping. Could also be vfio driver + incorrect mapping. It's not
> good
> > to assume ENODEV and ENOTSUP only comes from DPDK, it could be from
> > kernel.
> 
> The code here is a workaround to be compatible with kernel driver, since we
> can't
> know the device driver in vhost for now. In fact, the appropriate approach for
> justifying
> whether perform mapping here should base on the device driver.

OK. But since this code does not consider the case of FreeBSD, checking ENODEV 
is enough.

> 
> >
> > > +
> > > +				VHOST_LOG_CONFIG(ERR, "DMA engine map
> > failed\n");
> > > +				/* DMA mapping errors won't stop
> > > VHST_USER_SET_MEM_TABLE. */

VHST -> VHOST

> > >  				return 0;
> >
> > I understand this function covers many cases and it's difficult to
> differentiate, So
> > you don't check the return value but use the log here to inform users.
> >
> > I suggest to use a WARNING log (since this could fail with kernel driver
> case but
> > it's actually correct) and print the errno info for users.
> 
> Thanks for your suggestion. Accordingly, I will update the doc to inform user
> should
> take which action according to different warning message.

If ENODEV is used to filter the error but correct case (using kernel driver),
you can just use ERR log here. But return type should be void.

Thanks,
Chenbo

> 
> >
> > Note: this is only a workaround, not a perfect solution. But since vhost
> with
> > dmadev is in progress and most likely vhost lib will aware of dmadev id. The
> > problem could be solved later (some dmadev api could be used to know VA/PA
> > mode and kernel/user driver?)
> 
> With rte_iova_mode(), we can know current IOVA mode in DPDK. It will not
> affect
> vfio mapping because bound a device to vfio always requires mapping. For
> dmadev,
> it is possible to know current driver, since the integration for async vhost
> with dmadev
> is in progress, so let's use this solution for now.
> 
> >
> > > -
> > > -			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> > > -			/* DMA mapping errors won't stop
> > VHST_USER_SET_MEM_TABLE. */
> > > -			return 0;
> > > +			}
> > >  		}
> > >
> > >  	} else {
> > > -		/* Remove mapped region from the default container of DPDK.
> > */
> > > -		ret =
> > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > > -						   region->host_user_addr,
> > > -						   host_iova,
> > > -						   region->size);
> > > -		if (ret) {
> > > -			/* like DMA map, ignore the kernel driver case when
> > unmap.
> > > */
> > > -			if (rte_errno == EINVAL)
> > > -				return 0;
> > > +		for (i = 0; i < dev->nr_guest_pages; i++) {
> > > +			page = &dev->guest_pages[i];
> > > +			ret =
> > > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > > +							   page-
> > >host_user_addr,
> > > +							   page-
> > >host_phys_addr,
> > > +							   page->size);
> > > +			if (ret) {
> > > +				/* like DMA map, ignore the kernel driver case
> > when
> > > unmap. */
> > > +				if (rte_errno == EINVAL)
> > > +					return 0;
> > >
> > > -			VHOST_LOG_CONFIG(ERR, "DMA engine unmap
> > failed\n");
> > > -			return ret;
> > > +				VHOST_LOG_CONFIG(ERR, "DMA engine
> > unmap failed\n");
> > > +				return ret;
> >
> > Same here.
> >
> > And since you don't check return value, you can just don't return anything
> and
> > return type can be void
> 
> Good catch, pls see next version.
> 
> Thanks,
> Xuan
> 
> >
> > Thanks,
> > Chenbo
> >
> >
> > > +			}
> > >  		}
> > >  	}
> > >
> > > @@ -205,12 +208,12 @@ free_mem_region(struct virtio_net *dev)
> > >  	if (!dev || !dev->mem)
> > >  		return;
> > >
> > > +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > > +		async_dma_map(dev, false);
> > > +
> > >  	for (i = 0; i < dev->mem->nregions; i++) {
> > >  		reg = &dev->mem->regions[i];
> > >  		if (reg->host_user_addr) {
> > > -			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > > -				async_dma_map(reg, false);
> > > -
> > >  			munmap(reg->mmap_addr, reg->mmap_size);
> > >  			close(reg->fd);
> > >  		}
> > > @@ -978,7 +981,7 @@ vhost_user_set_vring_base(struct virtio_net
> > > **pdev,
> > >
> > >  static int
> > >  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> > > -		   uint64_t host_phys_addr, uint64_t size)
> > > +		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t
> > size)
> > >  {
> > >  	struct guest_page *page, *last_page;
> > >  	struct guest_page *old_pages;
> > > @@ -1009,6 +1012,7 @@ add_one_guest_page(struct virtio_net *dev,
> > > uint64_t guest_phys_addr,
> > >  	page = &dev->guest_pages[dev->nr_guest_pages++];
> > >  	page->guest_phys_addr = guest_phys_addr;
> > >  	page->host_phys_addr  = host_phys_addr;
> > > +	page->host_user_addr = host_user_addr;
> > >  	page->size = size;
> > >
> > >  	return 0;
> > > @@ -1028,7 +1032,8 @@ add_guest_pages(struct virtio_net *dev, struct
> > > rte_vhost_mem_region *reg,
> > >  	size = page_size - (guest_phys_addr & (page_size - 1));
> > >  	size = RTE_MIN(size, reg_size);
> > >
> > > -	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) <
> > 0)
> > > +	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> > > +			       host_user_addr, size) < 0)
> > >  		return -1;
> > >
> > >  	host_user_addr  += size;
> > > @@ -1040,7 +1045,7 @@ add_guest_pages(struct virtio_net *dev, struct
> > > rte_vhost_mem_region *reg,
> > >  		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
> > >  						  host_user_addr);
> > >  		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> > > -				size) < 0)
> > > +				       host_user_addr, size) < 0)
> > >  			return -1;
> > >
> > >  		host_user_addr  += size;
> > > @@ -1215,7 +1220,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> > >  	uint64_t mmap_size;
> > >  	uint64_t alignment;
> > >  	int populate;
> > > -	int ret;
> > >
> > >  	/* Check for memory_size + mmap_offset overflow */
> > >  	if (mmap_offset >= -region->size) {
> > > @@ -1274,14 +1278,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> > >  			VHOST_LOG_CONFIG(ERR, "adding guest pages to
> > region failed.\n");
> > >  			return -1;
> > >  		}
> > > -
> > > -		if (rte_vfio_is_enabled("vfio")) {
> > > -			ret = async_dma_map(region, true);
> > > -			if (ret) {
> > > -				VHOST_LOG_CONFIG(ERR, "Configure IOMMU
> > for DMA engine
> > > failed\n");
> > > -				return -1;
> > > -			}
> > > -		}
> > >  	}
> > >
> > >  	VHOST_LOG_CONFIG(INFO,
> > > @@ -1420,6 +1416,9 @@ vhost_user_set_mem_table(struct virtio_net
> > > **pdev, struct VhostUserMsg *msg,
> > >  		dev->mem->nregions++;
> > >  	}
> > >
> > > +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > > +		async_dma_map(dev, true);
> > > +
> > >  	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
> > >  		goto free_mem_table;
> > >
> > > --
> > > 2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v4] vhost: fix physical address mapping
  2021-11-10  5:46 [dpdk-dev] [PATCH] vhost: fix physical address mapping Xuan Ding
  2021-11-10  5:56 ` Xuan Ding
  2021-11-10  6:06 ` [dpdk-dev] [PATCH v3] " Xuan Ding
@ 2021-11-15 12:32 ` Xuan Ding
  2021-11-16  7:47   ` Xia, Chenbo
  2 siblings, 1 reply; 10+ messages in thread
From: Xuan Ding @ 2021-11-15 12:32 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia
  Cc: dev, jiayu.hu, yuanx.wang, xingguang.he, Xuan Ding

When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
which requires page by page mapping for DMA devices. To be consistent,
this patch implements page by page mapping instead of mapping at the
region granularity for both IOVA as VA and PA mode.

Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")

Signed-off-by: Xuan Ding <xuan.ding@intel.com>
---
v4:
* Remove unnessary ENOSUP check.
* Adjust return type.

v3:
* Fix commit title.

v2:
* Fix a format issue.
---
 lib/vhost/vhost.h      |   1 +
 lib/vhost/vhost_user.c | 111 ++++++++++++++++++++---------------------
 2 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 7085e0885c..d246538ca5 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -355,6 +355,7 @@ struct vring_packed_desc_event {
 struct guest_page {
 	uint64_t guest_phys_addr;
 	uint64_t host_phys_addr;
+	uint64_t host_user_addr;
 	uint64_t size;
 };
 
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index a781346c4d..0a9dc3350f 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -143,57 +143,56 @@ get_blk_size(int fd)
 	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
 }
 
-static int
-async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+static void
+async_dma_map(struct virtio_net *dev, bool do_map)
 {
-	uint64_t host_iova;
 	int ret = 0;
-
-	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+	uint32_t i;
+	struct guest_page *page;
 	if (do_map) {
-		/* Add mapped region into the default container of DPDK. */
-		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						 region->host_user_addr,
-						 host_iova,
-						 region->size);
-		if (ret) {
-			/*
-			 * DMA device may bind with kernel driver, in this case,
-			 * we don't need to program IOMMU manually. However, if no
-			 * device is bound with vfio/uio in DPDK, and vfio kernel
-			 * module is loaded, the API will still be called and return
-			 * with ENODEV/ENOSUP.
-			 *
-			 * DPDK vfio only returns ENODEV/ENOSUP in very similar
-			 * situations(vfio either unsupported, or supported
-			 * but no devices found). Either way, no mappings could be
-			 * performed. We treat it as normal case in async path.
-			 */
-			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
-				return 0;
-
-			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
-			/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
-			return 0;
+		for (i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							 page->host_user_addr,
+							 page->host_phys_addr,
+							 page->size);
+			if (ret) {
+				/*
+				 * DMA device may bind with kernel driver, in this case,
+				 * we don't need to program IOMMU manually. However, if no
+				 * device is bound with vfio/uio in DPDK, and vfio kernel
+				 * module is loaded, the API will still be called and return
+				 * with ENODEV.
+				 *
+				 * DPDK vfio only returns ENODEV in very similar situations
+				 * (vfio either unsupported, or supported but no devices found).
+				 * Either way, no mappings could be performed. We treat it as
+				 * normal case in async path. This is a workaround.
+				 */
+				if (rte_errno == ENODEV)
+					return;
+
+				/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
+				VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+			}
 		}
 
 	} else {
-		/* Remove mapped region from the default container of DPDK. */
-		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						   region->host_user_addr,
-						   host_iova,
-						   region->size);
-		if (ret) {
-			/* like DMA map, ignore the kernel driver case when unmap. */
-			if (rte_errno == EINVAL)
-				return 0;
-
-			VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
-			return ret;
+		for (i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							   page->host_user_addr,
+							   page->host_phys_addr,
+							   page->size);
+			if (ret) {
+				/* like DMA map, ignore the kernel driver case when unmap. */
+				if (rte_errno == EINVAL)
+					return;
+
+				VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+			}
 		}
 	}
-
-	return ret;
 }
 
 static void
@@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev)
 	if (!dev || !dev->mem)
 		return;
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, false);
+
 	for (i = 0; i < dev->mem->nregions; i++) {
 		reg = &dev->mem->regions[i];
 		if (reg->host_user_addr) {
-			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
-				async_dma_map(reg, false);
-
 			munmap(reg->mmap_addr, reg->mmap_size);
 			close(reg->fd);
 		}
@@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
 
 static int
 add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
-		   uint64_t host_phys_addr, uint64_t size)
+		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t size)
 {
 	struct guest_page *page, *last_page;
 	struct guest_page *old_pages;
@@ -1009,6 +1008,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	page = &dev->guest_pages[dev->nr_guest_pages++];
 	page->guest_phys_addr = guest_phys_addr;
 	page->host_phys_addr  = host_phys_addr;
+	page->host_user_addr = host_user_addr;
 	page->size = size;
 
 	return 0;
@@ -1028,7 +1028,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 	size = page_size - (guest_phys_addr & (page_size - 1));
 	size = RTE_MIN(size, reg_size);
 
-	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
+	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+			       host_user_addr, size) < 0)
 		return -1;
 
 	host_user_addr  += size;
@@ -1040,7 +1041,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
 						  host_user_addr);
 		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
-				size) < 0)
+				       host_user_addr, size) < 0)
 			return -1;
 
 		host_user_addr  += size;
@@ -1215,7 +1216,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 	uint64_t mmap_size;
 	uint64_t alignment;
 	int populate;
-	int ret;
 
 	/* Check for memory_size + mmap_offset overflow */
 	if (mmap_offset >= -region->size) {
@@ -1274,14 +1274,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 			VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
 			return -1;
 		}
-
-		if (rte_vfio_is_enabled("vfio")) {
-			ret = async_dma_map(region, true);
-			if (ret) {
-				VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
-				return -1;
-			}
-		}
 	}
 
 	VHOST_LOG_CONFIG(INFO,
@@ -1420,6 +1412,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		dev->mem->nregions++;
 	}
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, true);
+
 	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
 		goto free_mem_table;
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v4] vhost: fix physical address mapping
  2021-11-15 12:32 ` [PATCH v4] " Xuan Ding
@ 2021-11-16  7:47   ` Xia, Chenbo
  2021-11-16  8:24     ` Ding, Xuan
  0 siblings, 1 reply; 10+ messages in thread
From: Xia, Chenbo @ 2021-11-16  7:47 UTC (permalink / raw)
  To: Ding, Xuan, maxime.coquelin; +Cc: dev, Hu, Jiayu, Wang, YuanX, He, Xingguang

> -----Original Message-----
> From: Ding, Xuan <xuan.ding@intel.com>
> Sent: Monday, November 15, 2021 8:32 PM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; Ding, Xuan
> <xuan.ding@intel.com>
> Subject: [PATCH v4] vhost: fix physical address mapping
> 
> When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
> which requires page by page mapping for DMA devices. To be consistent,
> this patch implements page by page mapping instead of mapping at the
> region granularity for both IOVA as VA and PA mode.
> 
> Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
> 
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> ---
> v4:
> * Remove unnessary ENOSUP check.
> * Adjust return type.
> 
> v3:
> * Fix commit title.
> 
> v2:
> * Fix a format issue.
> ---
>  lib/vhost/vhost.h      |   1 +
>  lib/vhost/vhost_user.c | 111 ++++++++++++++++++++---------------------
>  2 files changed, 54 insertions(+), 58 deletions(-)
> 
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 7085e0885c..d246538ca5 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -355,6 +355,7 @@ struct vring_packed_desc_event {
>  struct guest_page {
>  	uint64_t guest_phys_addr;
>  	uint64_t host_phys_addr;

This name confused me when doing review: this should be host_iova.

Could you help rename it in the patch?

> +	uint64_t host_user_addr;
>  	uint64_t size;
>  };
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index a781346c4d..0a9dc3350f 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -143,57 +143,56 @@ get_blk_size(int fd)
>  	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
>  }
> 
> -static int
> -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
> +static void
> +async_dma_map(struct virtio_net *dev, bool do_map)
>  {
> -	uint64_t host_iova;
>  	int ret = 0;
> -
> -	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
> +	uint32_t i;
> +	struct guest_page *page;
>  	if (do_map) {
> -		/* Add mapped region into the default container of DPDK. */
> -		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> -						 region->host_user_addr,
> -						 host_iova,
> -						 region->size);
> -		if (ret) {
> -			/*
> -			 * DMA device may bind with kernel driver, in this case,
> -			 * we don't need to program IOMMU manually. However, if no
> -			 * device is bound with vfio/uio in DPDK, and vfio kernel
> -			 * module is loaded, the API will still be called and return
> -			 * with ENODEV/ENOSUP.
> -			 *
> -			 * DPDK vfio only returns ENODEV/ENOSUP in very similar
> -			 * situations(vfio either unsupported, or supported
> -			 * but no devices found). Either way, no mappings could be
> -			 * performed. We treat it as normal case in async path.
> -			 */
> -			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> -				return 0;
> -
> -			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> -			/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
> -			return 0;
> +		for (i = 0; i < dev->nr_guest_pages; i++) {
> +			page = &dev->guest_pages[i];
> +			ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +							 page->host_user_addr,
> +							 page->host_phys_addr,
> +							 page->size);
> +			if (ret) {
> +				/*
> +				 * DMA device may bind with kernel driver, in this
> case,
> +				 * we don't need to program IOMMU manually. However,
> if no
> +				 * device is bound with vfio/uio in DPDK, and vfio
> kernel
> +				 * module is loaded, the API will still be called and
> return
> +				 * with ENODEV.
> +				 *
> +				 * DPDK vfio only returns ENODEV in very similar
> situations
> +				 * (vfio either unsupported, or supported but no
> devices found).
> +				 * Either way, no mappings could be performed. We
> treat it as
> +				 * normal case in async path. This is a workaround.
> +				 */
> +				if (rte_errno == ENODEV)
> +					return;
> +
> +				/* DMA mapping errors won't stop
> VHST_USER_SET_MEM_TABLE. */

A comment in v3 is missed:

VHST -> VHOST

Thanks,
Chenbo

> +				VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> +			}
>  		}
> 
>  	} else {
> -		/* Remove mapped region from the default container of DPDK. */
> -		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> -						   region->host_user_addr,
> -						   host_iova,
> -						   region->size);
> -		if (ret) {
> -			/* like DMA map, ignore the kernel driver case when unmap.
> */
> -			if (rte_errno == EINVAL)
> -				return 0;
> -
> -			VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
> -			return ret;
> +		for (i = 0; i < dev->nr_guest_pages; i++) {
> +			page = &dev->guest_pages[i];
> +			ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +							   page->host_user_addr,
> +							   page->host_phys_addr,
> +							   page->size);
> +			if (ret) {
> +				/* like DMA map, ignore the kernel driver case when
> unmap. */
> +				if (rte_errno == EINVAL)
> +					return;
> +
> +				VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
> +			}
>  		}
>  	}
> -
> -	return ret;
>  }
> 
>  static void
> @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev)
>  	if (!dev || !dev->mem)
>  		return;
> 
> +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +		async_dma_map(dev, false);
> +
>  	for (i = 0; i < dev->mem->nregions; i++) {
>  		reg = &dev->mem->regions[i];
>  		if (reg->host_user_addr) {
> -			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> -				async_dma_map(reg, false);
> -
>  			munmap(reg->mmap_addr, reg->mmap_size);
>  			close(reg->fd);
>  		}
> @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
> 
>  static int
>  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> -		   uint64_t host_phys_addr, uint64_t size)
> +		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t size)
>  {
>  	struct guest_page *page, *last_page;
>  	struct guest_page *old_pages;
> @@ -1009,6 +1008,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t
> guest_phys_addr,
>  	page = &dev->guest_pages[dev->nr_guest_pages++];
>  	page->guest_phys_addr = guest_phys_addr;
>  	page->host_phys_addr  = host_phys_addr;
> +	page->host_user_addr = host_user_addr;
>  	page->size = size;
> 
>  	return 0;
> @@ -1028,7 +1028,8 @@ add_guest_pages(struct virtio_net *dev, struct
> rte_vhost_mem_region *reg,
>  	size = page_size - (guest_phys_addr & (page_size - 1));
>  	size = RTE_MIN(size, reg_size);
> 
> -	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
> +	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> +			       host_user_addr, size) < 0)
>  		return -1;
> 
>  	host_user_addr  += size;
> @@ -1040,7 +1041,7 @@ add_guest_pages(struct virtio_net *dev, struct
> rte_vhost_mem_region *reg,
>  		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
>  						  host_user_addr);
>  		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> -				size) < 0)
> +				       host_user_addr, size) < 0)
>  			return -1;
> 
>  		host_user_addr  += size;
> @@ -1215,7 +1216,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
>  	uint64_t mmap_size;
>  	uint64_t alignment;
>  	int populate;
> -	int ret;
> 
>  	/* Check for memory_size + mmap_offset overflow */
>  	if (mmap_offset >= -region->size) {
> @@ -1274,14 +1274,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
>  			VHOST_LOG_CONFIG(ERR, "adding guest pages to region
> failed.\n");
>  			return -1;
>  		}
> -
> -		if (rte_vfio_is_enabled("vfio")) {
> -			ret = async_dma_map(region, true);
> -			if (ret) {
> -				VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine
> failed\n");
> -				return -1;
> -			}
> -		}
>  	}
> 
>  	VHOST_LOG_CONFIG(INFO,
> @@ -1420,6 +1412,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>  		dev->mem->nregions++;
>  	}
> 
> +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +		async_dma_map(dev, true);
> +
>  	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
>  		goto free_mem_table;
> 
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v4] vhost: fix physical address mapping
  2021-11-16  7:47   ` Xia, Chenbo
@ 2021-11-16  8:24     ` Ding, Xuan
  2021-11-17 14:39       ` Ding, Xuan
  0 siblings, 1 reply; 10+ messages in thread
From: Ding, Xuan @ 2021-11-16  8:24 UTC (permalink / raw)
  To: Xia, Chenbo, maxime.coquelin; +Cc: dev, Hu, Jiayu, Wang, YuanX, He, Xingguang

Hi Chenbo,

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: 2021年11月16日 15:48
> To: Ding, Xuan <xuan.ding@intel.com>; maxime.coquelin@redhat.com
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>
> Subject: RE: [PATCH v4] vhost: fix physical address mapping
> 
> > -----Original Message-----
> > From: Ding, Xuan <xuan.ding@intel.com>
> > Sent: Monday, November 15, 2021 8:32 PM
> > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
> > <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>; Ding,
> > Xuan <xuan.ding@intel.com>
> > Subject: [PATCH v4] vhost: fix physical address mapping
> >
> > When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
> > which requires page by page mapping for DMA devices. To be consistent,
> > this patch implements page by page mapping instead of mapping at the
> > region granularity for both IOVA as VA and PA mode.
> >
> > Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
> >
> > Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> > ---
> > v4:
> > * Remove unnessary ENOSUP check.
> > * Adjust return type.
> >
> > v3:
> > * Fix commit title.
> >
> > v2:
> > * Fix a format issue.
> > ---
> >  lib/vhost/vhost.h      |   1 +
> >  lib/vhost/vhost_user.c | 111
> > ++++++++++++++++++++---------------------
> >  2 files changed, 54 insertions(+), 58 deletions(-)
> >
> > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> > 7085e0885c..d246538ca5 100644
> > --- a/lib/vhost/vhost.h
> > +++ b/lib/vhost/vhost.h
> > @@ -355,6 +355,7 @@ struct vring_packed_desc_event {  struct
> > guest_page {
> >  	uint64_t guest_phys_addr;
> >  	uint64_t host_phys_addr;
> 
> This name confused me when doing review: this should be host_iova.
> 
> Could you help rename it in the patch?
 
Yes, the host_phys_addr is a iova, it depends on the DPDK IOVA mode.

> 
> > +	uint64_t host_user_addr;
> >  	uint64_t size;
> >  };
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > a781346c4d..0a9dc3350f 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -143,57 +143,56 @@ get_blk_size(int fd)
> >  	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;  }
> >
> > -static int
> > -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
> > +static void
> > +async_dma_map(struct virtio_net *dev, bool do_map)
> >  {
> > -	uint64_t host_iova;
> >  	int ret = 0;
> > -
> > -	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
> >host_user_addr);
> > +	uint32_t i;
> > +	struct guest_page *page;
> >  	if (do_map) {
> > -		/* Add mapped region into the default container of DPDK. */
> > -		ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > -						 region->host_user_addr,
> > -						 host_iova,
> > -						 region->size);
> > -		if (ret) {
> > -			/*
> > -			 * DMA device may bind with kernel driver, in this case,
> > -			 * we don't need to program IOMMU manually.
> However, if no
> > -			 * device is bound with vfio/uio in DPDK, and vfio kernel
> > -			 * module is loaded, the API will still be called and
> return
> > -			 * with ENODEV/ENOSUP.
> > -			 *
> > -			 * DPDK vfio only returns ENODEV/ENOSUP in very
> similar
> > -			 * situations(vfio either unsupported, or supported
> > -			 * but no devices found). Either way, no mappings could
> be
> > -			 * performed. We treat it as normal case in async path.
> > -			 */
> > -			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> > -				return 0;
> > -
> > -			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> > -			/* DMA mapping errors won't stop
> VHST_USER_SET_MEM_TABLE. */
> > -			return 0;
> > +		for (i = 0; i < dev->nr_guest_pages; i++) {
> > +			page = &dev->guest_pages[i];
> > +			ret =
> > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > +							 page-
> >host_user_addr,
> > +							 page-
> >host_phys_addr,
> > +							 page->size);
> > +			if (ret) {
> > +				/*
> > +				 * DMA device may bind with kernel driver, in
> this
> > case,
> > +				 * we don't need to program IOMMU manually.
> However,
> > if no
> > +				 * device is bound with vfio/uio in DPDK, and
> vfio
> > kernel
> > +				 * module is loaded, the API will still be called
> and
> > return
> > +				 * with ENODEV.
> > +				 *
> > +				 * DPDK vfio only returns ENODEV in very
> similar
> > situations
> > +				 * (vfio either unsupported, or supported but no
> > devices found).
> > +				 * Either way, no mappings could be performed.
> We
> > treat it as
> > +				 * normal case in async path. This is a
> workaround.
> > +				 */
> > +				if (rte_errno == ENODEV)
> > +					return;
> > +
> > +				/* DMA mapping errors won't stop
> > VHST_USER_SET_MEM_TABLE. */
> 
> A comment in v3 is missed:
 
Thanks for the catch. Will fix it in v5.

Thanks,
Xuan

> 
> VHST -> VHOST
> 
> Thanks,
> Chenbo
> 
> > +				VHOST_LOG_CONFIG(ERR, "DMA engine map
> failed\n");
> > +			}
> >  		}
> >
> >  	} else {
> > -		/* Remove mapped region from the default container of DPDK.
> */
> > -		ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > -						   region->host_user_addr,
> > -						   host_iova,
> > -						   region->size);
> > -		if (ret) {
> > -			/* like DMA map, ignore the kernel driver case when
> unmap.
> > */
> > -			if (rte_errno == EINVAL)
> > -				return 0;
> > -
> > -			VHOST_LOG_CONFIG(ERR, "DMA engine unmap
> failed\n");
> > -			return ret;
> > +		for (i = 0; i < dev->nr_guest_pages; i++) {
> > +			page = &dev->guest_pages[i];
> > +			ret =
> > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > +							   page-
> >host_user_addr,
> > +							   page-
> >host_phys_addr,
> > +							   page->size);
> > +			if (ret) {
> > +				/* like DMA map, ignore the kernel driver case
> when
> > unmap. */
> > +				if (rte_errno == EINVAL)
> > +					return;
> > +
> > +				VHOST_LOG_CONFIG(ERR, "DMA engine
> unmap failed\n");
> > +			}
> >  		}
> >  	}
> > -
> > -	return ret;
> >  }
> >
> >  static void
> > @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev)
> >  	if (!dev || !dev->mem)
> >  		return;
> >
> > +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > +		async_dma_map(dev, false);
> > +
> >  	for (i = 0; i < dev->mem->nregions; i++) {
> >  		reg = &dev->mem->regions[i];
> >  		if (reg->host_user_addr) {
> > -			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > -				async_dma_map(reg, false);
> > -
> >  			munmap(reg->mmap_addr, reg->mmap_size);
> >  			close(reg->fd);
> >  		}
> > @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net
> > **pdev,
> >
> >  static int
> >  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> > -		   uint64_t host_phys_addr, uint64_t size)
> > +		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t
> size)
> >  {
> >  	struct guest_page *page, *last_page;
> >  	struct guest_page *old_pages;
> > @@ -1009,6 +1008,7 @@ add_one_guest_page(struct virtio_net *dev,
> > uint64_t guest_phys_addr,
> >  	page = &dev->guest_pages[dev->nr_guest_pages++];
> >  	page->guest_phys_addr = guest_phys_addr;
> >  	page->host_phys_addr  = host_phys_addr;
> > +	page->host_user_addr = host_user_addr;
> >  	page->size = size;
> >
> >  	return 0;
> > @@ -1028,7 +1028,8 @@ add_guest_pages(struct virtio_net *dev, struct
> > rte_vhost_mem_region *reg,
> >  	size = page_size - (guest_phys_addr & (page_size - 1));
> >  	size = RTE_MIN(size, reg_size);
> >
> > -	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) <
> 0)
> > +	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> > +			       host_user_addr, size) < 0)
> >  		return -1;
> >
> >  	host_user_addr  += size;
> > @@ -1040,7 +1041,7 @@ add_guest_pages(struct virtio_net *dev, struct
> > rte_vhost_mem_region *reg,
> >  		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
> >  						  host_user_addr);
> >  		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> > -				size) < 0)
> > +				       host_user_addr, size) < 0)
> >  			return -1;
> >
> >  		host_user_addr  += size;
> > @@ -1215,7 +1216,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> >  	uint64_t mmap_size;
> >  	uint64_t alignment;
> >  	int populate;
> > -	int ret;
> >
> >  	/* Check for memory_size + mmap_offset overflow */
> >  	if (mmap_offset >= -region->size) {
> > @@ -1274,14 +1274,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> >  			VHOST_LOG_CONFIG(ERR, "adding guest pages to
> region failed.\n");
> >  			return -1;
> >  		}
> > -
> > -		if (rte_vfio_is_enabled("vfio")) {
> > -			ret = async_dma_map(region, true);
> > -			if (ret) {
> > -				VHOST_LOG_CONFIG(ERR, "Configure IOMMU
> for DMA engine
> > failed\n");
> > -				return -1;
> > -			}
> > -		}
> >  	}
> >
> >  	VHOST_LOG_CONFIG(INFO,
> > @@ -1420,6 +1412,9 @@ vhost_user_set_mem_table(struct virtio_net
> > **pdev, struct VhostUserMsg *msg,
> >  		dev->mem->nregions++;
> >  	}
> >
> > +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > +		async_dma_map(dev, true);
> > +
> >  	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
> >  		goto free_mem_table;
> >
> > --
> > 2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v4] vhost: fix physical address mapping
  2021-11-16  8:24     ` Ding, Xuan
@ 2021-11-17 14:39       ` Ding, Xuan
  0 siblings, 0 replies; 10+ messages in thread
From: Ding, Xuan @ 2021-11-17 14:39 UTC (permalink / raw)
  To: Ding, Xuan, Xia, Chenbo, maxime.coquelin
  Cc: dev, Hu, Jiayu, Wang, YuanX, He, Xingguang

Hi Maxime,

>-----Original Message-----
>From: Ding, Xuan <xuan.ding@intel.com>
>Sent: Tuesday, November 16, 2021 4:24 PM
>To: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com
>Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
><yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>
>Subject: RE: [PATCH v4] vhost: fix physical address mapping
>
>Hi Chenbo,
>
>> -----Original Message-----
>> From: Xia, Chenbo <chenbo.xia@intel.com>
>> Sent: 2021年11月16日 15:48
>> To: Ding, Xuan <xuan.ding@intel.com>; maxime.coquelin@redhat.com
>> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
>> <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>
>> Subject: RE: [PATCH v4] vhost: fix physical address mapping
>>
>> > -----Original Message-----
>> > From: Ding, Xuan <xuan.ding@intel.com>
>> > Sent: Monday, November 15, 2021 8:32 PM
>> > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
>> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Wang, YuanX
>> > <yuanx.wang@intel.com>; He, Xingguang <xingguang.he@intel.com>;
>> > Ding, Xuan <xuan.ding@intel.com>
>> > Subject: [PATCH v4] vhost: fix physical address mapping
>> >
>> > When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
>> > which requires page by page mapping for DMA devices. To be
>> > consistent, this patch implements page by page mapping instead of
>> > mapping at the region granularity for both IOVA as VA and PA mode.
>> >
>> > Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
>> >
>> > Signed-off-by: Xuan Ding <xuan.ding@intel.com>
>> > ---
>> > v4:
>> > * Remove unnessary ENOSUP check.
>> > * Adjust return type.
>> >
>> > v3:
>> > * Fix commit title.
>> >
>> > v2:
>> > * Fix a format issue.
>> > ---
>> >  lib/vhost/vhost.h      |   1 +
>> >  lib/vhost/vhost_user.c | 111
>> > ++++++++++++++++++++---------------------
>> >  2 files changed, 54 insertions(+), 58 deletions(-)
>> >
>> > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
>> > 7085e0885c..d246538ca5 100644
>> > --- a/lib/vhost/vhost.h
>> > +++ b/lib/vhost/vhost.h
>> > @@ -355,6 +355,7 @@ struct vring_packed_desc_event {  struct
>> > guest_page {
>> >  	uint64_t guest_phys_addr;
>> >  	uint64_t host_phys_addr;
>>
>> This name confused me when doing review: this should be host_iova.
>>
>> Could you help rename it in the patch?
>
>Yes, the host_phys_addr is a iova, it depends on the DPDK IOVA mode.
>
>>
>> > +	uint64_t host_user_addr;
>> >  	uint64_t size;
>> >  };
>> >
>> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
>> > a781346c4d..0a9dc3350f 100644
>> > --- a/lib/vhost/vhost_user.c
>> > +++ b/lib/vhost/vhost_user.c
>> > @@ -143,57 +143,56 @@ get_blk_size(int fd)
>> >  	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;  }
>> >
>> > -static int
>> > -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
>> > +static void
>> > +async_dma_map(struct virtio_net *dev, bool do_map)
>> >  {
>> > -	uint64_t host_iova;
>> >  	int ret = 0;
>> > -
>> > -	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
>> >host_user_addr);
>> > +	uint32_t i;
>> > +	struct guest_page *page;
>> >  	if (do_map) {
>> > -		/* Add mapped region into the default container of DPDK. */
>> > -		ret =
>> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
>> > -						 region->host_user_addr,
>> > -						 host_iova,
>> > -						 region->size);
>> > -		if (ret) {
>> > -			/*
>> > -			 * DMA device may bind with kernel driver, in this case,
>> > -			 * we don't need to program IOMMU manually.
>> However, if no
>> > -			 * device is bound with vfio/uio in DPDK, and vfio
>kernel
>> > -			 * module is loaded, the API will still be called and
>> return
>> > -			 * with ENODEV/ENOSUP.
>> > -			 *
>> > -			 * DPDK vfio only returns ENODEV/ENOSUP in very
>> similar
>> > -			 * situations(vfio either unsupported, or supported
>> > -			 * but no devices found). Either way, no mappings
>could
>> be
>> > -			 * performed. We treat it as normal case in async path.
>> > -			 */
>> > -			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
>> > -				return 0;
>> > -
>> > -			VHOST_LOG_CONFIG(ERR, "DMA engine map
>failed\n");
>> > -			/* DMA mapping errors won't stop
>> VHST_USER_SET_MEM_TABLE. */
>> > -			return 0;
>> > +		for (i = 0; i < dev->nr_guest_pages; i++) {
>> > +			page = &dev->guest_pages[i];
>> > +			ret =
>> > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
>> > +							 page-
>> >host_user_addr,
>> > +							 page-
>> >host_phys_addr,
>> > +							 page->size);
>> > +			if (ret) {
>> > +				/*
>> > +				 * DMA device may bind with kernel driver, in
>> this
>> > case,
>> > +				 * we don't need to program IOMMU manually.
>> However,
>> > if no
>> > +				 * device is bound with vfio/uio in DPDK, and
>> vfio
>> > kernel
>> > +				 * module is loaded, the API will still be called
>> and
>> > return
>> > +				 * with ENODEV.
>> > +				 *
>> > +				 * DPDK vfio only returns ENODEV in very
>> similar
>> > situations
>> > +				 * (vfio either unsupported, or supported but
>no
>> > devices found).
>> > +				 * Either way, no mappings could be
>performed.
>> We
>> > treat it as
>> > +				 * normal case in async path. This is a
>> workaround.
>> > +				 */
>> > +				if (rte_errno == ENODEV)
>> > +					return;
>> > +
>> > +				/* DMA mapping errors won't stop
>> > VHST_USER_SET_MEM_TABLE. */
>>
>> A comment in v3 is missed:
>
>Thanks for the catch. Will fix it in v5.
>
>Thanks,
>Xuan

After off list discussion with Yuan, this patch has dependency on Yuan's fix patch.
And it should also take the HVA continuity into consideration. Since the v21.11 merge window is almost closed,
we plan to merge the two patches and send it in next release, does it make sense for you?

Sorry for missing some test cases under PA mode, which requires some changes for the current design.

Thanks,
Xuan

>
>>
>> VHST -> VHOST
>>
>> Thanks,
>> Chenbo
>>
>> > +				VHOST_LOG_CONFIG(ERR, "DMA engine map
>> failed\n");
>> > +			}
>> >  		}
>> >
>> >  	} else {
>> > -		/* Remove mapped region from the default container of DPDK.
>> */
>> > -		ret =
>> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
>> > -						   region->host_user_addr,
>> > -						   host_iova,
>> > -						   region->size);
>> > -		if (ret) {
>> > -			/* like DMA map, ignore the kernel driver case when
>> unmap.
>> > */
>> > -			if (rte_errno == EINVAL)
>> > -				return 0;
>> > -
>> > -			VHOST_LOG_CONFIG(ERR, "DMA engine unmap
>> failed\n");
>> > -			return ret;
>> > +		for (i = 0; i < dev->nr_guest_pages; i++) {
>> > +			page = &dev->guest_pages[i];
>> > +			ret =
>> > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
>> > +							   page-
>> >host_user_addr,
>> > +							   page-
>> >host_phys_addr,
>> > +							   page->size);
>> > +			if (ret) {
>> > +				/* like DMA map, ignore the kernel driver case
>> when
>> > unmap. */
>> > +				if (rte_errno == EINVAL)
>> > +					return;
>> > +
>> > +				VHOST_LOG_CONFIG(ERR, "DMA engine
>> unmap failed\n");
>> > +			}
>> >  		}
>> >  	}
>> > -
>> > -	return ret;
>> >  }
>> >
>> >  static void
>> > @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev)
>> >  	if (!dev || !dev->mem)
>> >  		return;
>> >
>> > +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
>> > +		async_dma_map(dev, false);
>> > +
>> >  	for (i = 0; i < dev->mem->nregions; i++) {
>> >  		reg = &dev->mem->regions[i];
>> >  		if (reg->host_user_addr) {
>> > -			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
>> > -				async_dma_map(reg, false);
>> > -
>> >  			munmap(reg->mmap_addr, reg->mmap_size);
>> >  			close(reg->fd);
>> >  		}
>> > @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net
>> > **pdev,
>> >
>> >  static int
>> >  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
>> > -		   uint64_t host_phys_addr, uint64_t size)
>> > +		   uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t
>> size)
>> >  {
>> >  	struct guest_page *page, *last_page;
>> >  	struct guest_page *old_pages;
>> > @@ -1009,6 +1008,7 @@ add_one_guest_page(struct virtio_net *dev,
>> > uint64_t guest_phys_addr,
>> >  	page = &dev->guest_pages[dev->nr_guest_pages++];
>> >  	page->guest_phys_addr = guest_phys_addr;
>> >  	page->host_phys_addr  = host_phys_addr;
>> > +	page->host_user_addr = host_user_addr;
>> >  	page->size = size;
>> >
>> >  	return 0;
>> > @@ -1028,7 +1028,8 @@ add_guest_pages(struct virtio_net *dev, struct
>> > rte_vhost_mem_region *reg,
>> >  	size = page_size - (guest_phys_addr & (page_size - 1));
>> >  	size = RTE_MIN(size, reg_size);
>> >
>> > -	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size)
><
>> 0)
>> > +	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
>> > +			       host_user_addr, size) < 0)
>> >  		return -1;
>> >
>> >  	host_user_addr  += size;
>> > @@ -1040,7 +1041,7 @@ add_guest_pages(struct virtio_net *dev, struct
>> > rte_vhost_mem_region *reg,
>> >  		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
>> >  						  host_user_addr);
>> >  		if (add_one_guest_page(dev, guest_phys_addr,
>host_phys_addr,
>> > -				size) < 0)
>> > +				       host_user_addr, size) < 0)
>> >  			return -1;
>> >
>> >  		host_user_addr  += size;
>> > @@ -1215,7 +1216,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
>> >  	uint64_t mmap_size;
>> >  	uint64_t alignment;
>> >  	int populate;
>> > -	int ret;
>> >
>> >  	/* Check for memory_size + mmap_offset overflow */
>> >  	if (mmap_offset >= -region->size) { @@ -1274,14 +1274,6 @@
>> > vhost_user_mmap_region(struct virtio_net *dev,
>> >  			VHOST_LOG_CONFIG(ERR, "adding guest pages to
>> region failed.\n");
>> >  			return -1;
>> >  		}
>> > -
>> > -		if (rte_vfio_is_enabled("vfio")) {
>> > -			ret = async_dma_map(region, true);
>> > -			if (ret) {
>> > -				VHOST_LOG_CONFIG(ERR, "Configure IOMMU
>> for DMA engine
>> > failed\n");
>> > -				return -1;
>> > -			}
>> > -		}
>> >  	}
>> >
>> >  	VHOST_LOG_CONFIG(INFO,
>> > @@ -1420,6 +1412,9 @@ vhost_user_set_mem_table(struct virtio_net
>> > **pdev, struct VhostUserMsg *msg,
>> >  		dev->mem->nregions++;
>> >  	}
>> >
>> > +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
>> > +		async_dma_map(dev, true);
>> > +
>> >  	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
>> >  		goto free_mem_table;
>> >
>> > --
>> > 2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-11-17 14:39 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-10  5:46 [dpdk-dev] [PATCH] vhost: fix physical address mapping Xuan Ding
2021-11-10  5:56 ` Xuan Ding
2021-11-10  6:06 ` [dpdk-dev] [PATCH v3] " Xuan Ding
2021-11-15  7:20   ` Xia, Chenbo
2021-11-15  8:13     ` Ding, Xuan
2021-11-15 12:11       ` Xia, Chenbo
2021-11-15 12:32 ` [PATCH v4] " Xuan Ding
2021-11-16  7:47   ` Xia, Chenbo
2021-11-16  8:24     ` Ding, Xuan
2021-11-17 14:39       ` Ding, Xuan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).