From: Xuan Ding <xuan.ding@intel.com>
To: dev@dpdk.org, anatoly.burakov@intel.com,
maxime.coquelin@redhat.com, chenbo.xia@intel.com
Cc: jiayu.hu@intel.com, cheng1.jiang@intel.com,
bruce.richardson@intel.com, sunil.pai.g@intel.com,
yinan.wang@intel.com, yvonnex.yang@intel.com,
Xuan Ding <xuan.ding@intel.com>
Subject: [dpdk-dev] [PATCH v3 2/2] vhost: enable IOMMU for async vhost
Date: Sat, 25 Sep 2021 10:03:58 +0000 [thread overview]
Message-ID: <20210925100358.61995-3-xuan.ding@intel.com> (raw)
In-Reply-To: <20210925100358.61995-1-xuan.ding@intel.com>
The use of IOMMU has many advantages, such as isolation and address
translation. This patch extends the capbility of DMA engine to use
IOMMU if the DMA engine is bound to vfio.
When set memory table, the guest memory will be mapped
into the default container of DPDK.
Signed-off-by: Xuan Ding <xuan.ding@intel.com>
---
lib/vhost/vhost.h | 4 ++
lib/vhost/vhost_user.c | 112 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 114 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 89a31e4ca8..bc5695e899 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -370,6 +370,10 @@ struct virtio_net {
int16_t broadcast_rarp;
uint32_t nr_vring;
int async_copy;
+
+ /* Record the dma map status for each region. */
+ bool *async_map_status;
+
int extbuf;
int linearbuf;
struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 29a4c9af60..3990e9b057 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -45,6 +45,8 @@
#include <rte_common.h>
#include <rte_malloc.h>
#include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_errno.h>
#include "iotlb.h"
#include "vhost.h"
@@ -141,6 +143,63 @@ get_blk_size(int fd)
return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
}
+static int
+async_dma_map(struct rte_vhost_mem_region *region, bool *dma_map_success, bool do_map)
+{
+ uint64_t host_iova;
+ int ret = 0;
+
+ host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+ if (do_map) {
+ /* Add mapped region into the default container of DPDK. */
+ ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ region->host_user_addr,
+ host_iova,
+ region->size);
+ *dma_map_success = ret == 0;
+
+ if (ret) {
+ /*
+ * DMA device may bind with kernel driver, in this case,
+ * we don't need to program IOMMU manually. However, if no
+ * device is bound with vfio/uio in DPDK, and vfio kernel
+ * module is loaded, the API will still be called and return
+ * with ENODEV/ENOSUP.
+ *
+ * DPDK VFIO only returns ENODEV/ENOSUP in very similar
+ * situations(VFIO either unsupported, or supported
+ * but no devices found). Either way, no mappings could be
+ * performed. We treat it as normal case in async path.
+ */
+ if (rte_errno == ENODEV && rte_errno == ENOTSUP) {
+ return 0;
+ } else {
+ VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+ return ret;
+ }
+ }
+
+ } else {
+ /* No need to do vfio unmap if the map failed. */
+ if (!*dma_map_success)
+ return 0;
+
+ /* Remove mapped region from the default container of DPDK. */
+ ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ region->host_user_addr,
+ host_iova,
+ region->size);
+ if (ret) {
+ VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+ return ret;
+ }
+ /* Clear the flag once the unmap succeeds. */
+ *dma_map_success = 0;
+ }
+
+ return ret;
+}
+
static void
free_mem_region(struct virtio_net *dev)
{
@@ -153,6 +212,9 @@ free_mem_region(struct virtio_net *dev)
for (i = 0; i < dev->mem->nregions; i++) {
reg = &dev->mem->regions[i];
if (reg->host_user_addr) {
+ if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+ async_dma_map(reg, &dev->async_map_status[i], false);
+
munmap(reg->mmap_addr, reg->mmap_size);
close(reg->fd);
}
@@ -203,6 +265,11 @@ vhost_backend_cleanup(struct virtio_net *dev)
}
dev->postcopy_listening = 0;
+
+ if (dev->async_map_status) {
+ rte_free(dev->async_map_status);
+ dev->async_map_status = NULL;
+ }
}
static void
@@ -621,6 +688,17 @@ numa_realloc(struct virtio_net *dev, int index)
}
dev->mem = mem;
+ if (dev->async_copy && rte_vfio_is_enabled("vfio")) {
+ dev->async_map_status = rte_zmalloc_socket("async-dma-map-status",
+ sizeof(bool) * dev->mem->nregions, 0, node);
+ if (!dev->async_map_status) {
+ VHOST_LOG_CONFIG(ERR,
+ "(%d) failed to realloc dma mapping status on node\n",
+ dev->vid);
+ return dev;
+ }
+ }
+
gp = rte_realloc_socket(dev->guest_pages, dev->max_guest_pages * sizeof(*gp),
RTE_CACHE_LINE_SIZE, node);
if (!gp) {
@@ -1151,12 +1229,14 @@ vhost_user_postcopy_register(struct virtio_net *dev, int main_fd,
static int
vhost_user_mmap_region(struct virtio_net *dev,
struct rte_vhost_mem_region *region,
+ uint32_t region_index,
uint64_t mmap_offset)
{
void *mmap_addr;
uint64_t mmap_size;
uint64_t alignment;
int populate;
+ int ret;
/* Check for memory_size + mmap_offset overflow */
if (mmap_offset >= -region->size) {
@@ -1210,13 +1290,25 @@ vhost_user_mmap_region(struct virtio_net *dev,
region->mmap_size = mmap_size;
region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
- if (dev->async_copy)
+ if (dev->async_copy) {
if (add_guest_pages(dev, region, alignment) < 0) {
VHOST_LOG_CONFIG(ERR,
"adding guest pages to region failed.\n");
return -1;
}
+ if (rte_vfio_is_enabled("vfio")) {
+ ret = async_dma_map(region, &dev->async_map_status[region_index], true);
+ if (ret) {
+ VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA "
+ "engine failed\n");
+ rte_free(dev->async_map_status);
+ dev->async_map_status = NULL;
+ return -1;
+ }
+ }
+ }
+
VHOST_LOG_CONFIG(INFO,
"guest memory region size: 0x%" PRIx64 "\n"
"\t guest physical addr: 0x%" PRIx64 "\n"
@@ -1291,6 +1383,11 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
dev->mem = NULL;
}
+ if (dev->async_map_status) {
+ rte_free(dev->async_map_status);
+ dev->async_map_status = NULL;
+ }
+
/* Flush IOTLB cache as previous HVAs are now invalid */
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
for (i = 0; i < dev->nr_vring; i++)
@@ -1329,6 +1426,17 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
goto free_guest_pages;
}
+ if (dev->async_copy) {
+ dev->async_map_status = rte_zmalloc_socket("async-dma-map-status",
+ sizeof(bool) * memory->nregions, 0, numa_node);
+ if (!dev->async_map_status) {
+ VHOST_LOG_CONFIG(ERR,
+ "(%d) failed to allocate memory for dma mapping status\n",
+ dev->vid);
+ goto free_guest_pages;
+ }
+ }
+
for (i = 0; i < memory->nregions; i++) {
reg = &dev->mem->regions[i];
@@ -1345,7 +1453,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
mmap_offset = memory->regions[i].mmap_offset;
- if (vhost_user_mmap_region(dev, reg, mmap_offset) < 0) {
+ if (vhost_user_mmap_region(dev, reg, i, mmap_offset) < 0) {
VHOST_LOG_CONFIG(ERR, "Failed to mmap region %u\n", i);
goto free_mem_table;
}
--
2.17.1
next prev parent reply other threads:[~2021-09-25 10:11 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-01 5:30 [dpdk-dev] [PATCH 0/2] *** support IOMMU for DMA device *** Xuan Ding
2021-09-01 5:30 ` [dpdk-dev] [PATCH 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-01 5:30 ` [dpdk-dev] [PATCH 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-17 5:25 ` [dpdk-dev] [PATCH v2 0/2] support IOMMU for DMA device Xuan Ding
2021-09-17 5:25 ` [dpdk-dev] [PATCH v2 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-17 5:25 ` [dpdk-dev] [PATCH v2 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-23 14:39 ` Hu, Jiayu
2021-09-23 14:56 ` Maxime Coquelin
2021-09-24 1:53 ` Xia, Chenbo
2021-09-24 7:13 ` Maxime Coquelin
2021-09-24 7:35 ` Xia, Chenbo
2021-09-24 8:18 ` Ding, Xuan
2021-09-25 10:03 ` [dpdk-dev] [PATCH v3 0/2] support IOMMU for DMA device Xuan Ding
2021-09-25 10:03 ` [dpdk-dev] [PATCH v3 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-25 10:03 ` Xuan Ding [this message]
2021-09-27 4:17 ` [dpdk-dev] [PATCH v3 2/2] vhost: enable IOMMU for async vhost Hu, Jiayu
2021-09-27 4:55 ` Ding, Xuan
2021-09-25 10:33 ` [dpdk-dev] [PATCH v4 0/2] support IOMMU for DMA device Xuan Ding
2021-09-25 10:33 ` [dpdk-dev] [PATCH v4 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-25 10:33 ` [dpdk-dev] [PATCH v4 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-27 7:48 ` [dpdk-dev] [PATCH v5 0/2] support IOMMU for DMA device Xuan Ding
2021-09-27 7:48 ` [dpdk-dev] [PATCH v5 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-27 7:48 ` [dpdk-dev] [PATCH v5 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-27 12:13 ` Burakov, Anatoly
2021-09-28 9:03 ` Ding, Xuan
2021-09-29 2:41 ` [dpdk-dev] [PATCH v6 0/2] support IOMMU for DMA device Xuan Ding
2021-09-29 2:41 ` [dpdk-dev] [PATCH v6 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-29 2:41 ` [dpdk-dev] [PATCH v6 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-29 6:12 ` Hu, Jiayu
2021-09-29 9:39 ` Burakov, Anatoly
2021-09-30 5:17 ` Hu, Jiayu
2021-09-30 5:19 ` Hu, Jiayu
2021-10-11 7:59 ` [dpdk-dev] [PATCH v7 0/2] Support IOMMU for DMA device Xuan Ding
2021-10-11 7:59 ` [dpdk-dev] [PATCH v7 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-10-13 6:57 ` Yang, YvonneX
2021-10-21 9:50 ` Maxime Coquelin
2021-10-11 7:59 ` [dpdk-dev] [PATCH v7 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-10-13 6:57 ` Yang, YvonneX
2021-10-21 10:00 ` Maxime Coquelin
2021-10-21 12:33 ` [dpdk-dev] [PATCH v7 0/2] Support IOMMU for DMA device Maxime Coquelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210925100358.61995-3-xuan.ding@intel.com \
--to=xuan.ding@intel.com \
--cc=anatoly.burakov@intel.com \
--cc=bruce.richardson@intel.com \
--cc=chenbo.xia@intel.com \
--cc=cheng1.jiang@intel.com \
--cc=dev@dpdk.org \
--cc=jiayu.hu@intel.com \
--cc=maxime.coquelin@redhat.com \
--cc=sunil.pai.g@intel.com \
--cc=yinan.wang@intel.com \
--cc=yvonnex.yang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).