DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions
@ 2017-05-17 14:44 Pawel Wodkowski
  2017-05-17 17:20 ` Stephen Hemminger
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Pawel Wodkowski @ 2017-05-17 14:44 UTC (permalink / raw)
  To: dev; +Cc: Pawel Wodkowski

Currently it is not possible to use memory that is not owned by DPDK to
perform DMA. This scenarion might be used in vhost applications (like
SPDK) where guest send its own memory table. To fill this gap provide
API to allow registering arbitrary address in VFIO container.

Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
---
 lib/librte_eal/linuxapp/eal/Makefile            |   3 +
 lib/librte_eal/linuxapp/eal/eal_vfio.c          | 127 ++++++++++++++++++++----
 lib/librte_eal/linuxapp/eal/eal_vfio.h          |  10 ++
 lib/librte_eal/linuxapp/eal/include/rte_iommu.h |  76 ++++++++++++++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   7 ++
 5 files changed, 206 insertions(+), 17 deletions(-)
 create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h

diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 640afd0887de..f0d8ae6ab4a3 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -126,6 +126,9 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
 CFLAGS_eal_thread.o += -Wno-return-type
 endif
 
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include = \
+	include/rte_iommu.h
+
 INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
 
 SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 53ac725d22e0..549c9824fdd7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -39,6 +39,7 @@
 #include <rte_log.h>
 #include <rte_memory.h>
 #include <rte_eal_memconfig.h>
+#include <rte_iommu.h>
 
 #include "eal_filesystem.h"
 #include "eal_vfio.h"
@@ -50,17 +51,19 @@
 static struct vfio_config vfio_cfg;
 
 static int vfio_type1_dma_map(int);
+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 static int vfio_spapr_dma_map(int);
 static int vfio_noiommu_dma_map(int);
+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 
 /* IOMMU types we support */
 static const struct vfio_iommu_type iommu_types[] = {
 	/* x86 IOMMU, otherwise known as type 1 */
-	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map, &vfio_type1_dma_mem_map},
 	/* ppc64 IOMMU, otherwise known as spapr */
-	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
+	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map, NULL},
 	/* IOMMU-less mode */
-	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map, &vfio_noiommu_dma_mem_map},
 };
 
 int
@@ -378,6 +381,8 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				clear_group(vfio_group_fd);
 				return -1;
 			}
+
+			vfio_cfg.vfio_iommu_type = t;
 		}
 	}
 
@@ -690,33 +695,61 @@ vfio_get_group_no(const char *sysfs_base,
 }
 
 static int
-vfio_type1_dma_map(int vfio_container_fd)
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+		       uint64_t len, int do_map)
 {
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	int i, ret;
-
-	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		struct vfio_iommu_type1_dma_map dma_map;
-
-		if (ms[i].addr == NULL)
-			break;
+	struct vfio_iommu_type1_dma_map dma_map;
+	struct vfio_iommu_type1_dma_unmap dma_unmap;
+	int ret;
 
+	if (do_map != 0) {
 		memset(&dma_map, 0, sizeof(dma_map));
 		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
-		dma_map.vaddr = ms[i].addr_64;
-		dma_map.size = ms[i].len;
-		dma_map.iova = ms[i].phys_addr;
+		dma_map.vaddr = vaddr;
+		dma_map.size = len;
+		dma_map.iova = iova;
 		dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
 		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
-
 		if (ret) {
 			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, "
 					  "error %i (%s)\n", errno,
 					  strerror(errno));
 			return -1;
 		}
+
+	} else {
+		memset(&dma_unmap, 0, sizeof(dma_unmap));
+		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+		dma_unmap.size = len;
+		dma_unmap.iova = iova;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot clear DMA remapping, "
+					  "error %i (%s)\n", errno,
+					  strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	int i;
+
+	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		if (ms[i].addr == NULL)
+			break;
+
+		if (vfio_type1_dma_mem_map(vfio_container_fd, ms[i].addr_64,
+					   ms[i].phys_addr, ms[i].len, 1))
+			return 1;
 	}
 
 	return 0;
@@ -816,4 +849,64 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
 	return 0;
 }
 
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+			 uint64_t __rte_unused vaddr,
+			 uint64_t __rte_unused iova, uint64_t __rte_unused len,
+			 int __rte_unused do_map)
+{
+	/* No-IOMMU mode does not need DMA mapping */
+	return 0;
+}
+
+static int
+vfio_dma_mem_map(uint64_t vaddr, uint64_t iova,
+		       uint64_t len, int do_map)
+{
+	const struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;
+
+	if (!t) {
+		RTE_LOG(ERR, EAL, "  VFIO support not initialized\n");
+		return -1;
+	}
+
+	if (!t->dma_user_map_func) {
+		RTE_LOG(ERR, EAL,
+			"  VFIO custom DMA region maping not supported by IOMMU %s\n",
+			t->name);
+		return -1;
+	}
+
+	return t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,
+				    len, do_map);
+}
+
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	return vfio_dma_mem_map(vaddr, iova, len, 1);
+}
+
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	return vfio_dma_mem_map(vaddr, iova, len, 0);
+}
+
+#else
+
+int
+rte_iommu_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+		  __rte_unused uint64_t len)
+{
+	return 0;
+}
+
+int
+rte_iommu_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+		    __rte_unused uint64_t len)
+{
+	return 0;
+}
+
 #endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d7579..b1d7dd6496df 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -141,6 +141,7 @@ struct vfio_config {
 	int vfio_enabled;
 	int vfio_container_fd;
 	int vfio_active_groups;
+	const struct vfio_iommu_type *vfio_iommu_type;
 	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
 };
 
@@ -157,10 +158,19 @@ struct vfio_config {
  * */
 typedef int (*vfio_dma_func_t)(int);
 
+/* Custom memory region DMA mapping function prototype.
+ * Takes VFIO container fd, virtual address, phisical address, length and
+ * operation type (0 to unmap 1 for map) as a parameters.
+ * Returns 0 on success, -1 on error.
+ **/
+typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,
+				   uint64_t len, int do_map);
+
 struct vfio_iommu_type {
 	int type_id;
 	const char *name;
 	vfio_dma_func_t dma_map_func;
+	vfio_dma_user_func_t dma_user_map_func;
 };
 
 /* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
diff --git a/lib/librte_eal/linuxapp/eal/include/rte_iommu.h b/lib/librte_eal/linuxapp/eal/include/rte_iommu.h
new file mode 100644
index 000000000000..4036db12dd96
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/rte_iommu.h
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_IOMMU_H_
+#define _RTE_IOMMU_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Register memory region in current IOMMU to enable DMA.
+ *
+ * @param vaddr
+ *   Start of process virtual address.
+ * @param iova
+ *   Start of IO virtual address.
+ * @param len
+ *   Length of memory region.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+/**
+ * Unregister previously registered memory region.
+ *
+ * @param vaddr
+ *   Start of process virtual address.
+ * @param iova
+ *   Start of IO virtual address.
+ * @param len
+ *   Length of memory region.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IOMMU_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 670bab3a5ed6..e22dc37d6f46 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -198,3 +198,10 @@ DPDK_17.05 {
 	vfio_get_group_no;
 
 } DPDK_17.02;
+
+DPDK_17.08 {
+	global:
+
+	rte_iommu_dma_map;
+	rte_iommu_dma_unmap
+} DPDK_17.05
-- 
2.7.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions
  2017-05-17 14:44 [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions Pawel Wodkowski
@ 2017-05-17 17:20 ` Stephen Hemminger
  2017-05-18  9:06   ` Wodkowski, PawelX
  2017-05-18 11:23 ` Burakov, Anatoly
  2017-05-23 13:53 ` [dpdk-dev] [PATCH v2] " Pawel Wodkowski
  2 siblings, 1 reply; 12+ messages in thread
From: Stephen Hemminger @ 2017-05-17 17:20 UTC (permalink / raw)
  To: Pawel Wodkowski; +Cc: dev

On Wed, 17 May 2017 16:44:46 +0200
Pawel Wodkowski <pawelx.wodkowski@intel.com> wrote:

>  /* IOMMU types we support */
>  static const struct vfio_iommu_type iommu_types[] = {
>  	/* x86 IOMMU, otherwise known as type 1 */
> -	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
> +	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map, &vfio_type1_dma_mem_map},
>  	/* ppc64 IOMMU, otherwise known as spapr */
> -	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
> +	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map, NULL},
>  	/* IOMMU-less mode */
> -	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
> +	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map, &vfio_noiommu_dma_mem_map},
>  };

For complex tables like this why not use C99 style initializer.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions
  2017-05-17 17:20 ` Stephen Hemminger
@ 2017-05-18  9:06   ` Wodkowski, PawelX
  0 siblings, 0 replies; 12+ messages in thread
From: Wodkowski, PawelX @ 2017-05-18  9:06 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, May 17, 2017 7:20 PM
> To: Wodkowski, PawelX <pawelx.wodkowski@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions
> 
> On Wed, 17 May 2017 16:44:46 +0200
> Pawel Wodkowski <pawelx.wodkowski@intel.com> wrote:
> 
> >  /* IOMMU types we support */
> >  static const struct vfio_iommu_type iommu_types[] = {
> >  	/* x86 IOMMU, otherwise known as type 1 */
> > -	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
> > +	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map,
> &vfio_type1_dma_mem_map},
> >  	/* ppc64 IOMMU, otherwise known as spapr */
> > -	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
> > +	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map, NULL},
> >  	/* IOMMU-less mode */
> > -	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
> > +	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map,
> &vfio_noiommu_dma_mem_map},
> >  };
> 
> For complex tables like this why not use C99 style initializer.

Sure will change it in next version.
Any comments about functional side of this change before v2 ? :)

Pawel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions
  2017-05-17 14:44 [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions Pawel Wodkowski
  2017-05-17 17:20 ` Stephen Hemminger
@ 2017-05-18 11:23 ` Burakov, Anatoly
  2017-05-23 13:53 ` [dpdk-dev] [PATCH v2] " Pawel Wodkowski
  2 siblings, 0 replies; 12+ messages in thread
From: Burakov, Anatoly @ 2017-05-18 11:23 UTC (permalink / raw)
  To: Wodkowski, PawelX, dev; +Cc: Wodkowski, PawelX

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pawel Wodkowski
> Sent: Wednesday, May 17, 2017 3:45 PM
> To: dev@dpdk.org
> Cc: Wodkowski, PawelX <pawelx.wodkowski@intel.com>
> Subject: [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions
> 
> Currently it is not possible to use memory that is not owned by DPDK to
> perform DMA. This scenarion might be used in vhost applications (like
> SPDK) where guest send its own memory table. To fill this gap provide
> API to allow registering arbitrary address in VFIO container.
> 
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> ---

Seems to make sense to me. Presumably, such locally mapped memory would not work in multiprocess,
so, obvious as it may be, this should probably be documented.

Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [dpdk-dev] [PATCH v2] vfio: allow to map other memory regions
  2017-05-17 14:44 [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions Pawel Wodkowski
  2017-05-17 17:20 ` Stephen Hemminger
  2017-05-18 11:23 ` Burakov, Anatoly
@ 2017-05-23 13:53 ` Pawel Wodkowski
  2017-05-23 13:59   ` [dpdk-dev] [PATCH v3] " Pawel Wodkowski
  2 siblings, 1 reply; 12+ messages in thread
From: Pawel Wodkowski @ 2017-05-23 13:53 UTC (permalink / raw)
  To: dev; +Cc: Pawel Wodkowski

Currently it is not possible to use memory that is not owned by DPDK to
perform DMA. This scenarion might be used in vhost applications (like
SPDK) where guest send its own memory table. To fill this gap provide
API to allow registering arbitrary address in VFIO container.

Change-Id: Ic1f56e850cfdaa48eec02a8ee400e4a66f32892a
Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
---
Changes in v2:
 - Fix syntax error in version map file
 - Add note for multiprocess
 - Change iommu_types to use C99 designated initializers 
---
 lib/librte_eal/linuxapp/eal/Makefile            |   3 +
 lib/librte_eal/linuxapp/eal/eal_vfio.c          | 142 +++++++++++++++++++++---
 lib/librte_eal/linuxapp/eal/eal_vfio.h          |  10 ++
 lib/librte_eal/linuxapp/eal/include/rte_iommu.h |  78 +++++++++++++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   8 ++
 5 files changed, 224 insertions(+), 17 deletions(-)
 create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h

diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 640afd0887de..f0d8ae6ab4a3 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -126,6 +126,9 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
 CFLAGS_eal_thread.o += -Wno-return-type
 endif
 
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include = \
+	include/rte_iommu.h
+
 INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
 
 SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 53ac725d22e0..4e6cc4265a97 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -39,6 +39,7 @@
 #include <rte_log.h>
 #include <rte_memory.h>
 #include <rte_eal_memconfig.h>
+#include <rte_iommu.h>
 
 #include "eal_filesystem.h"
 #include "eal_vfio.h"
@@ -50,17 +51,34 @@
 static struct vfio_config vfio_cfg;
 
 static int vfio_type1_dma_map(int);
+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 static int vfio_spapr_dma_map(int);
 static int vfio_noiommu_dma_map(int);
+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 
 /* IOMMU types we support */
 static const struct vfio_iommu_type iommu_types[] = {
 	/* x86 IOMMU, otherwise known as type 1 */
-	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+	{
+		.type_id = RTE_VFIO_TYPE1,
+		.name = "Type 1",
+		.dma_map_func = &vfio_type1_dma_map,
+		.dma_user_map_func = &vfio_type1_dma_mem_map
+	},
 	/* ppc64 IOMMU, otherwise known as spapr */
-	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
+	{
+		.type_id = RTE_VFIO_SPAPR,
+		.name = "sPAPR",
+		.dma_map_func = &vfio_spapr_dma_map,
+		.dma_user_map_func = NULL
+	},
 	/* IOMMU-less mode */
-	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+	{
+		.type_id = RTE_VFIO_NOIOMMU,
+		.name = "No-IOMMU",
+		.dma_map_func = &vfio_noiommu_dma_map,
+		.dma_user_map_func = &vfio_noiommu_dma_mem_map
+	},
 };
 
 int
@@ -378,6 +396,8 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				clear_group(vfio_group_fd);
 				return -1;
 			}
+
+			vfio_cfg.vfio_iommu_type = t;
 		}
 	}
 
@@ -690,33 +710,61 @@ vfio_get_group_no(const char *sysfs_base,
 }
 
 static int
-vfio_type1_dma_map(int vfio_container_fd)
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+		       uint64_t len, int do_map)
 {
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	int i, ret;
-
-	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		struct vfio_iommu_type1_dma_map dma_map;
-
-		if (ms[i].addr == NULL)
-			break;
+	struct vfio_iommu_type1_dma_map dma_map;
+	struct vfio_iommu_type1_dma_unmap dma_unmap;
+	int ret;
 
+	if (do_map != 0) {
 		memset(&dma_map, 0, sizeof(dma_map));
 		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
-		dma_map.vaddr = ms[i].addr_64;
-		dma_map.size = ms[i].len;
-		dma_map.iova = ms[i].phys_addr;
+		dma_map.vaddr = vaddr;
+		dma_map.size = len;
+		dma_map.iova = iova;
 		dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
 		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
-
 		if (ret) {
 			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, "
 					  "error %i (%s)\n", errno,
 					  strerror(errno));
 			return -1;
 		}
+
+	} else {
+		memset(&dma_unmap, 0, sizeof(dma_unmap));
+		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+		dma_unmap.size = len;
+		dma_unmap.iova = iova;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot clear DMA remapping, "
+					  "error %i (%s)\n", errno,
+					  strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	int i;
+
+	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		if (ms[i].addr == NULL)
+			break;
+
+		if (vfio_type1_dma_mem_map(vfio_container_fd, ms[i].addr_64,
+					   ms[i].phys_addr, ms[i].len, 1))
+			return 1;
 	}
 
 	return 0;
@@ -816,4 +864,64 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
 	return 0;
 }
 
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+			 uint64_t __rte_unused vaddr,
+			 uint64_t __rte_unused iova, uint64_t __rte_unused len,
+			 int __rte_unused do_map)
+{
+	/* No-IOMMU mode does not need DMA mapping */
+	return 0;
+}
+
+static int
+vfio_dma_mem_map(uint64_t vaddr, uint64_t iova,
+		       uint64_t len, int do_map)
+{
+	const struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;
+
+	if (!t) {
+		RTE_LOG(ERR, EAL, "  VFIO support not initialized\n");
+		return -1;
+	}
+
+	if (!t->dma_user_map_func) {
+		RTE_LOG(ERR, EAL,
+			"  VFIO custom DMA region maping not supported by IOMMU %s\n",
+			t->name);
+		return -1;
+	}
+
+	return t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,
+				    len, do_map);
+}
+
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	return vfio_dma_mem_map(vaddr, iova, len, 1);
+}
+
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	return vfio_dma_mem_map(vaddr, iova, len, 0);
+}
+
+#else
+
+int
+rte_iommu_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+		  __rte_unused uint64_t len)
+{
+	return 0;
+}
+
+int
+rte_iommu_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+		    __rte_unused uint64_t len)
+{
+	return 0;
+}
+
 #endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d7579..b1d7dd6496df 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -141,6 +141,7 @@ struct vfio_config {
 	int vfio_enabled;
 	int vfio_container_fd;
 	int vfio_active_groups;
+	const struct vfio_iommu_type *vfio_iommu_type;
 	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
 };
 
@@ -157,10 +158,19 @@ struct vfio_config {
  * */
 typedef int (*vfio_dma_func_t)(int);
 
+/* Custom memory region DMA mapping function prototype.
+ * Takes VFIO container fd, virtual address, phisical address, length and
+ * operation type (0 to unmap 1 for map) as a parameters.
+ * Returns 0 on success, -1 on error.
+ **/
+typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,
+				   uint64_t len, int do_map);
+
 struct vfio_iommu_type {
 	int type_id;
 	const char *name;
 	vfio_dma_func_t dma_map_func;
+	vfio_dma_user_func_t dma_user_map_func;
 };
 
 /* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
diff --git a/lib/librte_eal/linuxapp/eal/include/rte_iommu.h b/lib/librte_eal/linuxapp/eal/include/rte_iommu.h
new file mode 100644
index 000000000000..ccdea153f7b6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/rte_iommu.h
@@ -0,0 +1,78 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_IOMMU_H_
+#define _RTE_IOMMU_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Register memory region in current IOMMU to enable DMA.
+ *
+ * @note
+ *   Registered memory is not shared in multiprocess environment.
+ * @param vaddr
+ *   Start of process virtual address.
+ * @param iova
+ *   Start of IO virtual address.
+ * @param len
+ *   Length of memory region.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+/**
+ * Unregister previously registered memory region.
+ *
+ * @param vaddr
+ *   Start of process virtual address.
+ * @param iova
+ *   Start of IO virtual address.
+ * @param len
+ *   Length of memory region.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IOMMU_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 670bab3a5ed6..7d9781e311a9 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -198,3 +198,11 @@ DPDK_17.05 {
 	vfio_get_group_no;
 
 } DPDK_17.02;
+
+DPDK_17.08 {
+	global:
+
+	rte_iommu_dma_map;
+	rte_iommu_dma_unmap;
+
+} DPDK_17.05
-- 
2.7.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [dpdk-dev] [PATCH v3] vfio: allow to map other memory regions
  2017-05-23 13:53 ` [dpdk-dev] [PATCH v2] " Pawel Wodkowski
@ 2017-05-23 13:59   ` Pawel Wodkowski
  2017-05-24 11:17     ` [dpdk-dev] [PATCH] " Pawel Wodkowski
  0 siblings, 1 reply; 12+ messages in thread
From: Pawel Wodkowski @ 2017-05-23 13:59 UTC (permalink / raw)
  To: dev; +Cc: Pawel Wodkowski

Currently it is not possible to use memory that is not owned by DPDK to
perform DMA. This scenarion might be used in vhost applications (like
SPDK) where guest send its own memory table. To fill this gap provide
API to allow registering arbitrary address in VFIO container.

Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
---
Changes in v3:
 - Removed Gerrit Change-Id

Changes in v2:
 - Fix syntax error in version map file
 - Add note for multiprocess
 - Change iommu_types to use C99 designated initializers 
---
 lib/librte_eal/linuxapp/eal/Makefile            |   3 +
 lib/librte_eal/linuxapp/eal/eal_vfio.c          | 142 +++++++++++++++++++++---
 lib/librte_eal/linuxapp/eal/eal_vfio.h          |  10 ++
 lib/librte_eal/linuxapp/eal/include/rte_iommu.h |  78 +++++++++++++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   8 ++
 5 files changed, 224 insertions(+), 17 deletions(-)
 create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h

diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 640afd0887de..f0d8ae6ab4a3 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -126,6 +126,9 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
 CFLAGS_eal_thread.o += -Wno-return-type
 endif
 
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include = \
+	include/rte_iommu.h
+
 INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
 
 SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 53ac725d22e0..4e6cc4265a97 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -39,6 +39,7 @@
 #include <rte_log.h>
 #include <rte_memory.h>
 #include <rte_eal_memconfig.h>
+#include <rte_iommu.h>
 
 #include "eal_filesystem.h"
 #include "eal_vfio.h"
@@ -50,17 +51,34 @@
 static struct vfio_config vfio_cfg;
 
 static int vfio_type1_dma_map(int);
+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 static int vfio_spapr_dma_map(int);
 static int vfio_noiommu_dma_map(int);
+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 
 /* IOMMU types we support */
 static const struct vfio_iommu_type iommu_types[] = {
 	/* x86 IOMMU, otherwise known as type 1 */
-	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+	{
+		.type_id = RTE_VFIO_TYPE1,
+		.name = "Type 1",
+		.dma_map_func = &vfio_type1_dma_map,
+		.dma_user_map_func = &vfio_type1_dma_mem_map
+	},
 	/* ppc64 IOMMU, otherwise known as spapr */
-	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
+	{
+		.type_id = RTE_VFIO_SPAPR,
+		.name = "sPAPR",
+		.dma_map_func = &vfio_spapr_dma_map,
+		.dma_user_map_func = NULL
+	},
 	/* IOMMU-less mode */
-	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+	{
+		.type_id = RTE_VFIO_NOIOMMU,
+		.name = "No-IOMMU",
+		.dma_map_func = &vfio_noiommu_dma_map,
+		.dma_user_map_func = &vfio_noiommu_dma_mem_map
+	},
 };
 
 int
@@ -378,6 +396,8 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				clear_group(vfio_group_fd);
 				return -1;
 			}
+
+			vfio_cfg.vfio_iommu_type = t;
 		}
 	}
 
@@ -690,33 +710,61 @@ vfio_get_group_no(const char *sysfs_base,
 }
 
 static int
-vfio_type1_dma_map(int vfio_container_fd)
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+		       uint64_t len, int do_map)
 {
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	int i, ret;
-
-	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		struct vfio_iommu_type1_dma_map dma_map;
-
-		if (ms[i].addr == NULL)
-			break;
+	struct vfio_iommu_type1_dma_map dma_map;
+	struct vfio_iommu_type1_dma_unmap dma_unmap;
+	int ret;
 
+	if (do_map != 0) {
 		memset(&dma_map, 0, sizeof(dma_map));
 		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
-		dma_map.vaddr = ms[i].addr_64;
-		dma_map.size = ms[i].len;
-		dma_map.iova = ms[i].phys_addr;
+		dma_map.vaddr = vaddr;
+		dma_map.size = len;
+		dma_map.iova = iova;
 		dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
 		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
-
 		if (ret) {
 			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, "
 					  "error %i (%s)\n", errno,
 					  strerror(errno));
 			return -1;
 		}
+
+	} else {
+		memset(&dma_unmap, 0, sizeof(dma_unmap));
+		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+		dma_unmap.size = len;
+		dma_unmap.iova = iova;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot clear DMA remapping, "
+					  "error %i (%s)\n", errno,
+					  strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	int i;
+
+	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		if (ms[i].addr == NULL)
+			break;
+
+		if (vfio_type1_dma_mem_map(vfio_container_fd, ms[i].addr_64,
+					   ms[i].phys_addr, ms[i].len, 1))
+			return 1;
 	}
 
 	return 0;
@@ -816,4 +864,64 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
 	return 0;
 }
 
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+			 uint64_t __rte_unused vaddr,
+			 uint64_t __rte_unused iova, uint64_t __rte_unused len,
+			 int __rte_unused do_map)
+{
+	/* No-IOMMU mode does not need DMA mapping */
+	return 0;
+}
+
+static int
+vfio_dma_mem_map(uint64_t vaddr, uint64_t iova,
+		       uint64_t len, int do_map)
+{
+	const struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;
+
+	if (!t) {
+		RTE_LOG(ERR, EAL, "  VFIO support not initialized\n");
+		return -1;
+	}
+
+	if (!t->dma_user_map_func) {
+		RTE_LOG(ERR, EAL,
+			"  VFIO custom DMA region maping not supported by IOMMU %s\n",
+			t->name);
+		return -1;
+	}
+
+	return t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,
+				    len, do_map);
+}
+
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	return vfio_dma_mem_map(vaddr, iova, len, 1);
+}
+
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	return vfio_dma_mem_map(vaddr, iova, len, 0);
+}
+
+#else
+
+int
+rte_iommu_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+		  __rte_unused uint64_t len)
+{
+	return 0;
+}
+
+int
+rte_iommu_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+		    __rte_unused uint64_t len)
+{
+	return 0;
+}
+
 #endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d7579..b1d7dd6496df 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -141,6 +141,7 @@ struct vfio_config {
 	int vfio_enabled;
 	int vfio_container_fd;
 	int vfio_active_groups;
+	const struct vfio_iommu_type *vfio_iommu_type;
 	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
 };
 
@@ -157,10 +158,19 @@ struct vfio_config {
  * */
 typedef int (*vfio_dma_func_t)(int);
 
+/* Custom memory region DMA mapping function prototype.
+ * Takes VFIO container fd, virtual address, phisical address, length and
+ * operation type (0 to unmap 1 for map) as a parameters.
+ * Returns 0 on success, -1 on error.
+ **/
+typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,
+				   uint64_t len, int do_map);
+
 struct vfio_iommu_type {
 	int type_id;
 	const char *name;
 	vfio_dma_func_t dma_map_func;
+	vfio_dma_user_func_t dma_user_map_func;
 };
 
 /* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
diff --git a/lib/librte_eal/linuxapp/eal/include/rte_iommu.h b/lib/librte_eal/linuxapp/eal/include/rte_iommu.h
new file mode 100644
index 000000000000..ccdea153f7b6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/rte_iommu.h
@@ -0,0 +1,78 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_IOMMU_H_
+#define _RTE_IOMMU_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Register memory region in current IOMMU to enable DMA.
+ *
+ * @note
+ *   Registered memory is not shared in multiprocess environment.
+ * @param vaddr
+ *   Start of process virtual address.
+ * @param iova
+ *   Start of IO virtual address.
+ * @param len
+ *   Length of memory region.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+/**
+ * Unregister previously registered memory region.
+ *
+ * @param vaddr
+ *   Start of process virtual address.
+ * @param iova
+ *   Start of IO virtual address.
+ * @param len
+ *   Length of memory region.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IOMMU_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 670bab3a5ed6..7d9781e311a9 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -198,3 +198,11 @@ DPDK_17.05 {
 	vfio_get_group_no;
 
 } DPDK_17.02;
+
+DPDK_17.08 {
+	global:
+
+	rte_iommu_dma_map;
+	rte_iommu_dma_unmap;
+
+} DPDK_17.05
-- 
2.7.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [dpdk-dev] [PATCH] vfio: allow to map other memory regions
  2017-05-23 13:59   ` [dpdk-dev] [PATCH v3] " Pawel Wodkowski
@ 2017-05-24 11:17     ` Pawel Wodkowski
       [not found]       ` <1496663643-65002-1-git-send-email-pawelx.wodkowski@intel.com>
                         ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Pawel Wodkowski @ 2017-05-24 11:17 UTC (permalink / raw)
  To: dev; +Cc: Pawel Wodkowski

Currently it is not possible to use memory that is not owned by DPDK to
perform DMA. This scenarion might be used in vhost applications (like
SPDK) where guest send its own memory table. To fill this gap provide
API to allow registering arbitrary address in VFIO container.

Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
---
Changes in v4:
 - Fix syntax another error in version map file

Changes in v3:
 - Removed Gerrit Change-Id

Changes in v2:
 - Fix syntax error in version map file
 - Add note for multiprocess
 - Change iommu_types to use C99 designated initializers 
---
 lib/librte_eal/linuxapp/eal/Makefile            |   3 +
 lib/librte_eal/linuxapp/eal/eal_vfio.c          | 142 +++++++++++++++++++++---
 lib/librte_eal/linuxapp/eal/eal_vfio.h          |  10 ++
 lib/librte_eal/linuxapp/eal/include/rte_iommu.h |  78 +++++++++++++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   8 ++
 5 files changed, 224 insertions(+), 17 deletions(-)
 create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h

diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 640afd0887de..f0d8ae6ab4a3 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -126,6 +126,9 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
 CFLAGS_eal_thread.o += -Wno-return-type
 endif
 
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include = \
+	include/rte_iommu.h
+
 INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
 
 SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 53ac725d22e0..4e6cc4265a97 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -39,6 +39,7 @@
 #include <rte_log.h>
 #include <rte_memory.h>
 #include <rte_eal_memconfig.h>
+#include <rte_iommu.h>
 
 #include "eal_filesystem.h"
 #include "eal_vfio.h"
@@ -50,17 +51,34 @@
 static struct vfio_config vfio_cfg;
 
 static int vfio_type1_dma_map(int);
+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 static int vfio_spapr_dma_map(int);
 static int vfio_noiommu_dma_map(int);
+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 
 /* IOMMU types we support */
 static const struct vfio_iommu_type iommu_types[] = {
 	/* x86 IOMMU, otherwise known as type 1 */
-	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+	{
+		.type_id = RTE_VFIO_TYPE1,
+		.name = "Type 1",
+		.dma_map_func = &vfio_type1_dma_map,
+		.dma_user_map_func = &vfio_type1_dma_mem_map
+	},
 	/* ppc64 IOMMU, otherwise known as spapr */
-	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
+	{
+		.type_id = RTE_VFIO_SPAPR,
+		.name = "sPAPR",
+		.dma_map_func = &vfio_spapr_dma_map,
+		.dma_user_map_func = NULL
+	},
 	/* IOMMU-less mode */
-	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+	{
+		.type_id = RTE_VFIO_NOIOMMU,
+		.name = "No-IOMMU",
+		.dma_map_func = &vfio_noiommu_dma_map,
+		.dma_user_map_func = &vfio_noiommu_dma_mem_map
+	},
 };
 
 int
@@ -378,6 +396,8 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				clear_group(vfio_group_fd);
 				return -1;
 			}
+
+			vfio_cfg.vfio_iommu_type = t;
 		}
 	}
 
@@ -690,33 +710,61 @@ vfio_get_group_no(const char *sysfs_base,
 }
 
 static int
-vfio_type1_dma_map(int vfio_container_fd)
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+		       uint64_t len, int do_map)
 {
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	int i, ret;
-
-	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		struct vfio_iommu_type1_dma_map dma_map;
-
-		if (ms[i].addr == NULL)
-			break;
+	struct vfio_iommu_type1_dma_map dma_map;
+	struct vfio_iommu_type1_dma_unmap dma_unmap;
+	int ret;
 
+	if (do_map != 0) {
 		memset(&dma_map, 0, sizeof(dma_map));
 		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
-		dma_map.vaddr = ms[i].addr_64;
-		dma_map.size = ms[i].len;
-		dma_map.iova = ms[i].phys_addr;
+		dma_map.vaddr = vaddr;
+		dma_map.size = len;
+		dma_map.iova = iova;
 		dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
 		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
-
 		if (ret) {
 			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, "
 					  "error %i (%s)\n", errno,
 					  strerror(errno));
 			return -1;
 		}
+
+	} else {
+		memset(&dma_unmap, 0, sizeof(dma_unmap));
+		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+		dma_unmap.size = len;
+		dma_unmap.iova = iova;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot clear DMA remapping, "
+					  "error %i (%s)\n", errno,
+					  strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	int i;
+
+	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		if (ms[i].addr == NULL)
+			break;
+
+		if (vfio_type1_dma_mem_map(vfio_container_fd, ms[i].addr_64,
+					   ms[i].phys_addr, ms[i].len, 1))
+			return 1;
 	}
 
 	return 0;
@@ -816,4 +864,64 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
 	return 0;
 }
 
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+			 uint64_t __rte_unused vaddr,
+			 uint64_t __rte_unused iova, uint64_t __rte_unused len,
+			 int __rte_unused do_map)
+{
+	/* No-IOMMU mode does not need DMA mapping */
+	return 0;
+}
+
+static int
+vfio_dma_mem_map(uint64_t vaddr, uint64_t iova,
+		       uint64_t len, int do_map)
+{
+	const struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;
+
+	if (!t) {
+		RTE_LOG(ERR, EAL, "  VFIO support not initialized\n");
+		return -1;
+	}
+
+	if (!t->dma_user_map_func) {
+		RTE_LOG(ERR, EAL,
+			"  VFIO custom DMA region maping not supported by IOMMU %s\n",
+			t->name);
+		return -1;
+	}
+
+	return t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,
+				    len, do_map);
+}
+
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	return vfio_dma_mem_map(vaddr, iova, len, 1);
+}
+
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	return vfio_dma_mem_map(vaddr, iova, len, 0);
+}
+
+#else
+
+int
+rte_iommu_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+		  __rte_unused uint64_t len)
+{
+	return 0;
+}
+
+int
+rte_iommu_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+		    __rte_unused uint64_t len)
+{
+	return 0;
+}
+
 #endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d7579..b1d7dd6496df 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -141,6 +141,7 @@ struct vfio_config {
 	int vfio_enabled;
 	int vfio_container_fd;
 	int vfio_active_groups;
+	const struct vfio_iommu_type *vfio_iommu_type;
 	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
 };
 
@@ -157,10 +158,19 @@ struct vfio_config {
  * */
 typedef int (*vfio_dma_func_t)(int);
 
+/* Custom memory region DMA mapping function prototype.
+ * Takes VFIO container fd, virtual address, phisical address, length and
+ * operation type (0 to unmap 1 for map) as a parameters.
+ * Returns 0 on success, -1 on error.
+ **/
+typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,
+				   uint64_t len, int do_map);
+
 struct vfio_iommu_type {
 	int type_id;
 	const char *name;
 	vfio_dma_func_t dma_map_func;
+	vfio_dma_user_func_t dma_user_map_func;
 };
 
 /* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
diff --git a/lib/librte_eal/linuxapp/eal/include/rte_iommu.h b/lib/librte_eal/linuxapp/eal/include/rte_iommu.h
new file mode 100644
index 000000000000..ccdea153f7b6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/rte_iommu.h
@@ -0,0 +1,78 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_IOMMU_H_
+#define _RTE_IOMMU_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Register memory region in current IOMMU to enable DMA.
+ *
+ * @note
+ *   Registered memory is not shared in multiprocess environment.
+ * @param vaddr
+ *   Start of process virtual address.
+ * @param iova
+ *   Start of IO virtual address.
+ * @param len
+ *   Length of memory region.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+/**
+ * Unregister previously registered memory region.
+ *
+ * @param vaddr
+ *   Start of process virtual address.
+ * @param iova
+ *   Start of IO virtual address.
+ * @param len
+ *   Length of memory region.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IOMMU_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 670bab3a5ed6..2689ed59652b 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -198,3 +198,11 @@ DPDK_17.05 {
 	vfio_get_group_no;
 
 } DPDK_17.02;
+
+DPDK_17.08 {
+	global:
+
+	rte_iommu_dma_map;
+	rte_iommu_dma_unmap;
+
+} DPDK_17.05;
-- 
2.7.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: allow to map other memory regions
       [not found]       ` <1496663643-65002-1-git-send-email-pawelx.wodkowski@intel.com>
@ 2017-06-05  8:16         ` Wodkowski, PawelX
  0 siblings, 0 replies; 12+ messages in thread
From: Wodkowski, PawelX @ 2017-06-05  8:16 UTC (permalink / raw)
  To: dev; +Cc: Burakov, Anatoly

> -----Original Message-----
> From: Wodkowski, PawelX
> Sent: Monday, June 05, 2017 1:54 PM
> To: Wodkowski, PawelX <pawelx.wodkowski@intel.com>
> Subject: [PATCH] vfio: allow to map other memory regions
> 
> Currently it is not possible to use memory that is not owned by DPDK to
> perform DMA. This scenarion might be used in vhost applications (like
> SPDK) where guest send its own memory table. To fill this gap provide
> API to allow registering arbitrary address in VFIO container.
> 
> Change-Id: Ic1f56e850cfdaa48eec02a8ee400e4a66f32892a
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>

Any thoughts about final version?

Thanks
Pawel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: allow to map other memory regions
  2017-05-24 11:17     ` [dpdk-dev] [PATCH] " Pawel Wodkowski
       [not found]       ` <1496663643-65002-1-git-send-email-pawelx.wodkowski@intel.com>
@ 2017-06-13  9:02       ` Burakov, Anatoly
  2017-06-19 21:04       ` Thomas Monjalon
  2 siblings, 0 replies; 12+ messages in thread
From: Burakov, Anatoly @ 2017-06-13  9:02 UTC (permalink / raw)
  To: Wodkowski, PawelX, dev; +Cc: Wodkowski, PawelX

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pawel Wodkowski
> Sent: Wednesday, May 24, 2017 12:18 PM
> To: dev@dpdk.org
> Cc: Wodkowski, PawelX <pawelx.wodkowski@intel.com>
> Subject: [dpdk-dev] [PATCH] vfio: allow to map other memory regions
> 
> Currently it is not possible to use memory that is not owned by DPDK to
> perform DMA. This scenarion might be used in vhost applications (like
> SPDK) where guest send its own memory table. To fill this gap provide API to
> allow registering arbitrary address in VFIO container.
> 
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> ---

Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: allow to map other memory regions
  2017-05-24 11:17     ` [dpdk-dev] [PATCH] " Pawel Wodkowski
       [not found]       ` <1496663643-65002-1-git-send-email-pawelx.wodkowski@intel.com>
  2017-06-13  9:02       ` Burakov, Anatoly
@ 2017-06-19 21:04       ` Thomas Monjalon
  2017-06-28  9:54         ` Wodkowski, PawelX
  2 siblings, 1 reply; 12+ messages in thread
From: Thomas Monjalon @ 2017-06-19 21:04 UTC (permalink / raw)
  To: Pawel Wodkowski; +Cc: dev

Hi,
Some comments below

24/05/2017 13:17, Pawel Wodkowski:
> Currently it is not possible to use memory that is not owned by DPDK to
> perform DMA. This scenarion might be used in vhost applications (like
> SPDK) where guest send its own memory table. To fill this gap provide
> API to allow registering arbitrary address in VFIO container.
> 
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> ---
>  lib/librte_eal/linuxapp/eal/Makefile            |   3 +
>  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 142 +++++++++++++++++++++---
>  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  10 ++
>  lib/librte_eal/linuxapp/eal/include/rte_iommu.h |  78 +++++++++++++
>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   8 ++
>  5 files changed, 224 insertions(+), 17 deletions(-)
>  create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h

VFIO is not referenced in the doxygen of these functions.
Could we use this API for something else than VFIO?

Any API should be declared in common directory, even if it is not
implemented for FreeBSD (returning -ENOTSUP).

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: allow to map other memory regions
  2017-06-19 21:04       ` Thomas Monjalon
@ 2017-06-28  9:54         ` Wodkowski, PawelX
  2017-06-28 11:50           ` Thomas Monjalon
  0 siblings, 1 reply; 12+ messages in thread
From: Wodkowski, PawelX @ 2017-06-28  9:54 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Monday, June 19, 2017 11:04 PM
> To: Wodkowski, PawelX <pawelx.wodkowski@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] vfio: allow to map other memory regions
> 
> Hi,
> Some comments below
> 
> 24/05/2017 13:17, Pawel Wodkowski:
> > Currently it is not possible to use memory that is not owned by DPDK to
> > perform DMA. This scenarion might be used in vhost applications (like
> > SPDK) where guest send its own memory table. To fill this gap provide
> > API to allow registering arbitrary address in VFIO container.
> >
> > Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> > ---
> >  lib/librte_eal/linuxapp/eal/Makefile            |   3 +
> >  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 142
> +++++++++++++++++++++---
> >  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  10 ++
> >  lib/librte_eal/linuxapp/eal/include/rte_iommu.h |  78 +++++++++++++
> >  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   8 ++
> >  5 files changed, 224 insertions(+), 17 deletions(-)
> >  create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h
> 
> VFIO is not referenced in the doxygen of these functions.
> Could we use this API for something else than VFIO?

This is for any IOMMU hw/module/driver used in host which require special
care about memory regions used for DMA. It is not restricted to VFIO even though
only VFIO is implemented.

> 
> Any API should be declared in common directory, even if it is not
> implemented for FreeBSD (returning -ENOTSUP).

I think those function should be NOP for FreeBSD (like RTE_VFIO_NOIOMMU do)
or be conditionally compiled/included (like it is now). I decide to take second way.
Do you think that I should move rte_iommu.h to common directory and use #ifdef
there?

Pawel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: allow to map other memory regions
  2017-06-28  9:54         ` Wodkowski, PawelX
@ 2017-06-28 11:50           ` Thomas Monjalon
  0 siblings, 0 replies; 12+ messages in thread
From: Thomas Monjalon @ 2017-06-28 11:50 UTC (permalink / raw)
  To: Wodkowski, PawelX; +Cc: dev

28/06/2017 11:54, Wodkowski, PawelX:
> > -----Original Message-----
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > Sent: Monday, June 19, 2017 11:04 PM
> > To: Wodkowski, PawelX <pawelx.wodkowski@intel.com>
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] vfio: allow to map other memory regions
> > 
> > Hi,
> > Some comments below
> > 
> > 24/05/2017 13:17, Pawel Wodkowski:
> > > Currently it is not possible to use memory that is not owned by DPDK to
> > > perform DMA. This scenarion might be used in vhost applications (like
> > > SPDK) where guest send its own memory table. To fill this gap provide
> > > API to allow registering arbitrary address in VFIO container.
> > >
> > > Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> > > ---
> > >  lib/librte_eal/linuxapp/eal/Makefile            |   3 +
> > >  lib/librte_eal/linuxapp/eal/eal_vfio.c          | 142
> > +++++++++++++++++++++---
> > >  lib/librte_eal/linuxapp/eal/eal_vfio.h          |  10 ++
> > >  lib/librte_eal/linuxapp/eal/include/rte_iommu.h |  78 +++++++++++++
> > >  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   8 ++
> > >  5 files changed, 224 insertions(+), 17 deletions(-)
> > >  create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h
> > 
> > VFIO is not referenced in the doxygen of these functions.
> > Could we use this API for something else than VFIO?
> 
> This is for any IOMMU hw/module/driver used in host which require special
> care about memory regions used for DMA. It is not restricted to VFIO even though
> only VFIO is implemented.
> 
> > 
> > Any API should be declared in common directory, even if it is not
> > implemented for FreeBSD (returning -ENOTSUP).
> 
> I think those function should be NOP for FreeBSD (like RTE_VFIO_NOIOMMU do)
> or be conditionally compiled/included (like it is now). I decide to take second way.
> Do you think that I should move rte_iommu.h to common directory and use #ifdef
> there?

No #ifdef please.
You must define new functions in a common header and implement it
for Linux and BSD. The BSD function can be return an error.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-06-28 11:50 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-17 14:44 [dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions Pawel Wodkowski
2017-05-17 17:20 ` Stephen Hemminger
2017-05-18  9:06   ` Wodkowski, PawelX
2017-05-18 11:23 ` Burakov, Anatoly
2017-05-23 13:53 ` [dpdk-dev] [PATCH v2] " Pawel Wodkowski
2017-05-23 13:59   ` [dpdk-dev] [PATCH v3] " Pawel Wodkowski
2017-05-24 11:17     ` [dpdk-dev] [PATCH] " Pawel Wodkowski
     [not found]       ` <1496663643-65002-1-git-send-email-pawelx.wodkowski@intel.com>
2017-06-05  8:16         ` Wodkowski, PawelX
2017-06-13  9:02       ` Burakov, Anatoly
2017-06-19 21:04       ` Thomas Monjalon
2017-06-28  9:54         ` Wodkowski, PawelX
2017-06-28 11:50           ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).