DPDK patches and discussions
 help / color / mirror / Atom feed
From: xuan.ding@intel.com
To: maxime.coquelin@redhat.com, chenbo.xia@intel.com
Cc: dev@dpdk.org, ktraynor@redhat.com, ferruh.yigit@intel.com,
	jiayu.hu@intel.com, yuanx.wang@intel.com,
	Xuan Ding <xuan.ding@intel.com>,
	stable@dpdk.org
Subject: [PATCH v4 2/2] vhost: fix physical address mapping
Date: Wed, 16 Feb 2022 02:28:54 +0000
Message-ID: <20220216022854.39057-3-xuan.ding@intel.com> (raw)
In-Reply-To: <20220216022854.39057-1-xuan.ding@intel.com>

From: Xuan Ding <xuan.ding@intel.com>

When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
which requires page by page mapping for DMA devices. To be consistent,
this patch implements page by page mapping instead of mapping at the
region granularity for both IOVA as VA and PA mode.

Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
Cc: stable@dpdk.org

Signed-off-by: Xuan Ding <xuan.ding@intel.com>
Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vhost.h      |   1 +
 lib/vhost/vhost_user.c | 119 ++++++++++++++++++++---------------------
 2 files changed, 58 insertions(+), 62 deletions(-)

diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index ccac679c25..21e1866a52 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -427,6 +427,7 @@ struct vring_packed_desc_event {
 struct guest_page {
 	uint64_t guest_phys_addr;
 	uint64_t host_iova;
+	uint64_t host_user_addr;
 	uint64_t size;
 };
 
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 066adfb464..9d31d8840e 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -142,57 +142,57 @@ get_blk_size(int fd)
 	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
 }
 
-static int
-async_dma_map(struct virtio_net *dev, struct rte_vhost_mem_region *region, bool do_map)
+static void
+async_dma_map(struct virtio_net *dev, bool do_map)
 {
-	uint64_t host_iova;
 	int ret = 0;
+	uint32_t i;
+	struct guest_page *page;
 
-	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
 	if (do_map) {
-		/* Add mapped region into the default container of DPDK. */
-		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						 region->host_user_addr,
-						 host_iova,
-						 region->size);
-		if (ret) {
-			/*
-			 * DMA device may bind with kernel driver, in this case,
-			 * we don't need to program IOMMU manually. However, if no
-			 * device is bound with vfio/uio in DPDK, and vfio kernel
-			 * module is loaded, the API will still be called and return
-			 * with ENODEV/ENOSUP.
-			 *
-			 * DPDK vfio only returns ENODEV/ENOSUP in very similar
-			 * situations(vfio either unsupported, or supported
-			 * but no devices found). Either way, no mappings could be
-			 * performed. We treat it as normal case in async path.
-			 */
-			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
-				return 0;
-
-			VHOST_LOG_CONFIG(ERR, "(%s) DMA engine map failed\n", dev->ifname);
-			/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
-			return 0;
+		for (i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							 page->host_user_addr,
+							 page->host_iova,
+							 page->size);
+			if (ret) {
+				/*
+				 * DMA device may bind with kernel driver, in this case,
+				 * we don't need to program IOMMU manually. However, if no
+				 * device is bound with vfio/uio in DPDK, and vfio kernel
+				 * module is loaded, the API will still be called and return
+				 * with ENODEV.
+				 *
+				 * DPDK vfio only returns ENODEV in very similar situations
+				 * (vfio either unsupported, or supported but no devices found).
+				 * Either way, no mappings could be performed. We treat it as
+				 * normal case in async path. This is a workaround.
+				 */
+				if (rte_errno == ENODEV)
+					return;
+
+				/* DMA mapping errors won't stop VHOST_USER_SET_MEM_TABLE. */
+				VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+			}
 		}
 
 	} else {
-		/* Remove mapped region from the default container of DPDK. */
-		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
-						   region->host_user_addr,
-						   host_iova,
-						   region->size);
-		if (ret) {
-			/* like DMA map, ignore the kernel driver case when unmap. */
-			if (rte_errno == EINVAL)
-				return 0;
-
-			VHOST_LOG_CONFIG(ERR, "(%s) DMA engine unmap failed\n", dev->ifname);
-			return ret;
+		for (i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+			ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+							   page->host_user_addr,
+							   page->host_iova,
+							   page->size);
+			if (ret) {
+				/* like DMA map, ignore the kernel driver case when unmap. */
+				if (rte_errno == EINVAL)
+					return;
+
+				VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+			}
 		}
 	}
-
-	return ret;
 }
 
 static void
@@ -204,12 +204,12 @@ free_mem_region(struct virtio_net *dev)
 	if (!dev || !dev->mem)
 		return;
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, false);
+
 	for (i = 0; i < dev->mem->nregions; i++) {
 		reg = &dev->mem->regions[i];
 		if (reg->host_user_addr) {
-			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
-				async_dma_map(dev, reg, false);
-
 			munmap(reg->mmap_addr, reg->mmap_size);
 			close(reg->fd);
 		}
@@ -988,7 +988,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
 
 static int
 add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
-		   uint64_t host_iova, uint64_t size)
+		   uint64_t host_iova, uint64_t host_user_addr, uint64_t size)
 {
 	struct guest_page *page, *last_page;
 	struct guest_page *old_pages;
@@ -1000,7 +1000,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 					dev->max_guest_pages * sizeof(*page),
 					RTE_CACHE_LINE_SIZE);
 		if (dev->guest_pages == NULL) {
-			VHOST_LOG_CONFIG(ERR, "(%s) cannot realloc guest_pages\n", dev->ifname);
+			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
 			rte_free(old_pages);
 			return -1;
 		}
@@ -1009,8 +1009,9 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	if (dev->nr_guest_pages > 0) {
 		last_page = &dev->guest_pages[dev->nr_guest_pages - 1];
 		/* merge if the two pages are continuous */
-		if (host_iova == last_page->host_iova +
-				      last_page->size) {
+		if (host_iova == last_page->host_iova + last_page->size &&
+		    guest_phys_addr == last_page->guest_phys_addr + last_page->size &&
+		    host_user_addr == last_page->host_user_addr + last_page->size) {
 			last_page->size += size;
 			return 0;
 		}
@@ -1019,6 +1020,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	page = &dev->guest_pages[dev->nr_guest_pages++];
 	page->guest_phys_addr = guest_phys_addr;
 	page->host_iova  = host_iova;
+	page->host_user_addr = host_user_addr;
 	page->size = size;
 
 	return 0;
@@ -1038,7 +1040,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 	size = page_size - (guest_phys_addr & (page_size - 1));
 	size = RTE_MIN(size, reg_size);
 
-	if (add_one_guest_page(dev, guest_phys_addr, host_iova, size) < 0)
+	if (add_one_guest_page(dev, guest_phys_addr, host_iova,
+			       host_user_addr, size) < 0)
 		return -1;
 
 	host_user_addr  += size;
@@ -1050,7 +1053,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		host_iova = rte_mem_virt2iova((void *)(uintptr_t)
 						  host_user_addr);
 		if (add_one_guest_page(dev, guest_phys_addr, host_iova,
-				size) < 0)
+				       host_user_addr, size) < 0)
 			return -1;
 
 		host_user_addr  += size;
@@ -1226,7 +1229,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 	uint64_t mmap_size;
 	uint64_t alignment;
 	int populate;
-	int ret;
 
 	/* Check for memory_size + mmap_offset overflow */
 	if (mmap_offset >= -region->size) {
@@ -1283,16 +1285,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
 					dev->ifname);
 			return -1;
 		}
-
-		if (rte_vfio_is_enabled("vfio")) {
-			ret = async_dma_map(dev, region, true);
-			if (ret) {
-				VHOST_LOG_CONFIG(ERR,
-					"(%s) configure IOMMU for DMA engine failed\n",
-					dev->ifname);
-				return -1;
-			}
-		}
 	}
 
 	VHOST_LOG_CONFIG(INFO, "(%s) guest memory region size: 0x%" PRIx64 "\n",
@@ -1429,6 +1421,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
 		dev->mem->nregions++;
 	}
 
+	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+		async_dma_map(dev, true);
+
 	if (vhost_user_postcopy_register(dev, main_fd, ctx) < 0)
 		goto free_mem_table;
 
-- 
2.17.1


  parent reply	other threads:[~2022-02-16  2:29 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-14 16:11 [PATCH 0/2] " xuan.ding
2022-01-14 16:11 ` [PATCH 1/2] " xuan.ding
2022-01-14 16:12 ` [PATCH 2/2] vhost: rename field in guest page struct xuan.ding
2022-02-16  2:28 ` [PATCH v4 0/2] vhost: fix async address mapping xuan.ding
2022-02-16  2:28   ` [PATCH v4 1/2] vhost: fix field naming in guest page struct xuan.ding
2022-02-16  2:28   ` xuan.ding [this message]
2022-02-17  8:54   ` [PATCH v4 0/2] vhost: fix async address mapping Maxime Coquelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220216022854.39057-3-xuan.ding@intel.com \
    --to=xuan.ding@intel.com \
    --cc=chenbo.xia@intel.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=jiayu.hu@intel.com \
    --cc=ktraynor@redhat.com \
    --cc=maxime.coquelin@redhat.com \
    --cc=stable@dpdk.org \
    --cc=yuanx.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git