From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: xuan.ding@intel.com, chenbo.xia@intel.com
Cc: dev@dpdk.org, jiayu.hu@intel.com, yuanx.wang@intel.com
Subject: Re: [PATCH v2 2/2] vhost: fix physical address mapping
Date: Tue, 1 Feb 2022 09:51:22 +0100 [thread overview]
Message-ID: <e14749dd-2f0e-8681-2353-6ecd7ea17718@redhat.com> (raw)
In-Reply-To: <20220119151016.9970-3-xuan.ding@intel.com>
On 1/19/22 16:10, xuan.ding@intel.com wrote:
> From: Xuan Ding <xuan.ding@intel.com>
>
> When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
> which requires page by page mapping for DMA devices. To be consistent,
> this patch implements page by page mapping instead of mapping at the
> region granularity for both IOVA as VA and PA mode.
>
> Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
>
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> ---
> lib/vhost/vhost.h | 1 +
> lib/vhost/vhost_user.c | 116 ++++++++++++++++++++---------------------
> 2 files changed, 57 insertions(+), 60 deletions(-)
>
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index ca7f58039d..9521ae56da 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -355,6 +355,7 @@ struct vring_packed_desc_event {
> struct guest_page {
> uint64_t guest_phys_addr;
> uint64_t host_iova;
> + uint64_t host_user_addr;
> uint64_t size;
> };
>
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 95c9df697e..48c08716ba 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -143,57 +143,56 @@ get_blk_size(int fd)
> return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
> }
>
> -static int
> -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
> +static void
> +async_dma_map(struct virtio_net *dev, bool do_map)
> {
> - uint64_t host_iova;
> int ret = 0;
> -
> - host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
> + uint32_t i;
> + struct guest_page *page;
Add new line.
> if (do_map) {
> - /* Add mapped region into the default container of DPDK. */
> - ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> - region->host_user_addr,
> - host_iova,
> - region->size);
> - if (ret) {
> - /*
> - * DMA device may bind with kernel driver, in this case,
> - * we don't need to program IOMMU manually. However, if no
> - * device is bound with vfio/uio in DPDK, and vfio kernel
> - * module is loaded, the API will still be called and return
> - * with ENODEV/ENOSUP.
> - *
> - * DPDK vfio only returns ENODEV/ENOSUP in very similar
> - * situations(vfio either unsupported, or supported
> - * but no devices found). Either way, no mappings could be
> - * performed. We treat it as normal case in async path.
> - */
> - if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> - return 0;
> -
> - VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> - /* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
> - return 0;
> + for (i = 0; i < dev->nr_guest_pages; i++) {
> + page = &dev->guest_pages[i];
> + ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> + page->host_user_addr,
> + page->host_iova,
> + page->size);
> + if (ret) {
> + /*
> + * DMA device may bind with kernel driver, in this case,
> + * we don't need to program IOMMU manually. However, if no
> + * device is bound with vfio/uio in DPDK, and vfio kernel
> + * module is loaded, the API will still be called and return
> + * with ENODEV.
> + *
> + * DPDK vfio only returns ENODEV in very similar situations
> + * (vfio either unsupported, or supported but no devices found).
> + * Either way, no mappings could be performed. We treat it as
> + * normal case in async path. This is a workaround.
> + */
> + if (rte_errno == ENODEV)
> + return;
> +
> + /* DMA mapping errors won't stop VHOST_USER_SET_MEM_TABLE. */
> + VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> + }
> }
>
> } else {
> - /* Remove mapped region from the default container of DPDK. */
> - ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> - region->host_user_addr,
> - host_iova,
> - region->size);
> - if (ret) {
> - /* like DMA map, ignore the kernel driver case when unmap. */
> - if (rte_errno == EINVAL)
> - return 0;
> -
> - VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
> - return ret;
> + for (i = 0; i < dev->nr_guest_pages; i++) {
> + page = &dev->guest_pages[i];
> + ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> + page->host_user_addr,
> + page->host_iova,
> + page->size);
> + if (ret) {
> + /* like DMA map, ignore the kernel driver case when unmap. */
> + if (rte_errno == EINVAL)
> + return;
> +
> + VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
> + }
> }
> }
> -
> - return ret;
> }
>
> static void
> @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev)
> if (!dev || !dev->mem)
> return;
>
> + if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> + async_dma_map(dev, false);
> +
> for (i = 0; i < dev->mem->nregions; i++) {
> reg = &dev->mem->regions[i];
> if (reg->host_user_addr) {
> - if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> - async_dma_map(reg, false);
> -
> munmap(reg->mmap_addr, reg->mmap_size);
> close(reg->fd);
> }
> @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
>
> static int
> add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> - uint64_t host_iova, uint64_t size)
> + uint64_t host_iova, uint64_t host_user_addr, uint64_t size)
> {
> struct guest_page *page, *last_page;
> struct guest_page *old_pages;
> @@ -999,8 +998,9 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> if (dev->nr_guest_pages > 0) {
> last_page = &dev->guest_pages[dev->nr_guest_pages - 1];
> /* merge if the two pages are continuous */
> - if (host_iova == last_page->host_iova +
> - last_page->size) {
> + if (host_iova == last_page->host_iova + last_page->size
> + && guest_phys_addr == last_page->guest_phys_addr + last_page->size
> + && host_user_addr == last_page->host_user_addr + last_page->size) {
Indentation looks wrong.
> last_page->size += size;
> return 0;
> }
> @@ -1009,6 +1009,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> page = &dev->guest_pages[dev->nr_guest_pages++];
> page->guest_phys_addr = guest_phys_addr;
> page->host_iova = host_iova;
> + page->host_user_addr = host_user_addr;
> page->size = size;
>
> return 0;
> @@ -1028,7 +1029,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
> size = page_size - (guest_phys_addr & (page_size - 1));
> size = RTE_MIN(size, reg_size);
>
> - if (add_one_guest_page(dev, guest_phys_addr, host_iova, size) < 0)
> + if (add_one_guest_page(dev, guest_phys_addr, host_iova,
> + host_user_addr, size) < 0)
> return -1;
>
> host_user_addr += size;
> @@ -1040,7 +1042,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
> host_iova = rte_mem_virt2iova((void *)(uintptr_t)
> host_user_addr);
> if (add_one_guest_page(dev, guest_phys_addr, host_iova,
> - size) < 0)
> + host_user_addr, size) < 0)
> return -1;
>
> host_user_addr += size;
> @@ -1215,7 +1217,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> uint64_t mmap_size;
> uint64_t alignment;
> int populate;
> - int ret;
>
> /* Check for memory_size + mmap_offset overflow */
> if (mmap_offset >= -region->size) {
> @@ -1274,14 +1275,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
> return -1;
> }
> -
> - if (rte_vfio_is_enabled("vfio")) {
> - ret = async_dma_map(region, true);
> - if (ret) {
> - VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
> - return -1;
> - }
> - }
> }
>
> VHOST_LOG_CONFIG(INFO,
> @@ -1420,6 +1413,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
> dev->mem->nregions++;
> }
>
> + if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> + async_dma_map(dev, true);
> +
> if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
> goto free_mem_table;
>
Overall, the patch looks good.
Please fix the small nits & add Fixes tag and cc stable on patch 1.
Thanks,
Maxime
next prev parent reply other threads:[~2022-02-01 8:51 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-19 15:10 [PATCH v2 0/2] vhost: fix async " xuan.ding
2022-01-19 15:10 ` [PATCH v2 1/2] vhost: rename field in guest page struct xuan.ding
2022-02-01 8:47 ` Maxime Coquelin
2022-01-19 15:10 ` [PATCH v2 2/2] vhost: fix physical address mapping xuan.ding
2022-02-01 8:51 ` Maxime Coquelin [this message]
2022-02-04 10:43 ` Maxime Coquelin
2022-02-01 8:28 ` [PATCH v2 0/2] vhost: fix async " Maxime Coquelin
2022-02-01 11:29 ` Kevin Traynor
2022-02-04 10:43 ` Maxime Coquelin
2022-02-04 10:56 ` Maxime Coquelin
2022-02-11 2:48 ` Ding, Xuan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e14749dd-2f0e-8681-2353-6ecd7ea17718@redhat.com \
--to=maxime.coquelin@redhat.com \
--cc=chenbo.xia@intel.com \
--cc=dev@dpdk.org \
--cc=jiayu.hu@intel.com \
--cc=xuan.ding@intel.com \
--cc=yuanx.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).