From: Santosh Shukla <sshukla@mvista.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v3 11/12] vfio: Support for no-IOMMU mode
Date: Thu, 7 Jan 2016 22:03:08 +0530 [thread overview]
Message-ID: <1452184390-5994-12-git-send-email-sshukla@mvista.com> (raw)
In-Reply-To: <1452184390-5994-1-git-send-email-sshukla@mvista.com>
From: Anatoly Burakov <anatoly.burakov@intel.com>
This commit is adding a generic mechanism to support multiple IOMMU
types. For now, it's only type 1 (x86 IOMMU) and no-IOMMU (a special
VFIO mode that doesn't use IOMMU at all), but it's easily extended
by adding necessary definitions into eal_pci_init.h and a DMA
mapping function to eal_pci_vfio_dma.c.
Since type 1 IOMMU module is no longer necessary to have VFIO,
we fix the module check to check for vfio-pci instead. It's not
ideal and triggers VFIO checks more often (and thus produces more
error output, which was the reason behind the module check in the
first place), so we compensate for that by providing more verbose
logging, indicating whether VFIO initialization has succeeded or
failed.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
lib/librte_eal/linuxapp/eal/Makefile | 1 +
lib/librte_eal/linuxapp/eal/eal_pci_init.h | 22 ++++
lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 142 +++++++++++++++---------
lib/librte_eal/linuxapp/eal/eal_pci_vfio_dma.c | 84 ++++++++++++++
lib/librte_eal/linuxapp/eal/eal_vfio.h | 5 +
5 files changed, 201 insertions(+), 53 deletions(-)
create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio_dma.c
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 26eced5..5c9e9d9 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -59,6 +59,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_log.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_uio.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio_dma.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio_mp_sync.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_debug.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_lcore.c
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
index 3bc592b..068800c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -112,6 +112,28 @@ struct vfio_config {
struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
};
+/* function pointer typedef for DMA mapping functions */
+typedef int (*vfio_dma_func_t)(int);
+
+/* Structure to hold supported IOMMU types */
+struct vfio_iommu_type {
+ int type_id;
+ const char *name;
+ vfio_dma_func_t dma_map_func;
+};
+
+/* function prototypes for different IOMMU types */
+int vfio_iommu_type1_dma_map(int container_fd);
+int vfio_iommu_noiommu_dma_map(int container_fd);
+
+/* IOMMU types we support */
+static const struct vfio_iommu_type iommu_types[] = {
+ /* x86 IOMMU, otherwise known as type 1 */
+ { VFIO_TYPE1_IOMMU, "Type 1", &vfio_iommu_type1_dma_map},
+ /* IOMMU-less mode */
+ { VFIO_NOIOMMU_IOMMU, "No-IOMMU", &vfio_iommu_noiommu_dma_map},
+};
+
#endif
#endif /* EAL_PCI_INIT_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 7ec203c..c69050d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -72,6 +72,7 @@ EAL_REGISTER_TAILQ(rte_vfio_tailq)
#define VFIO_DIR "/dev/vfio"
#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
#define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
/* per-process VFIO config */
@@ -236,42 +237,57 @@ pci_vfio_set_bus_master(int dev_fd)
return 0;
}
-/* set up DMA mappings */
-static int
-pci_vfio_setup_dma_maps(int vfio_container_fd)
-{
- const struct rte_memseg *ms = rte_eal_get_physmem_layout();
- int i, ret;
-
- ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
- VFIO_TYPE1_IOMMU);
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot set IOMMU type, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
+/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
+static const struct vfio_iommu_type *
+pci_vfio_set_iommu_type(int vfio_container_fd) {
+ for (unsigned idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+ const struct vfio_iommu_type *t = &iommu_types[idx];
+
+ int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
+ t->type_id);
+ if (!ret) {
+ RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",
+ t->type_id, t->name);
+ return t;
+ }
+ /* not an error, there may be more supported IOMMU types */
+ RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, "
+ "error %i (%s)\n", t->type_id, t->name, errno,
+ strerror(errno));
}
+ /* if we didn't find a suitable IOMMU type, fail */
+ return NULL;
+}
- /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
- for (i = 0; i < RTE_MAX_MEMSEG; i++) {
- struct vfio_iommu_type1_dma_map dma_map;
-
- if (ms[i].addr == NULL)
- break;
-
- memset(&dma_map, 0, sizeof(dma_map));
- dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
- dma_map.vaddr = ms[i].addr_64;
- dma_map.size = ms[i].len;
- dma_map.iova = ms[i].phys_addr;
- dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
-
- ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+/* check if we have any supported extensions */
+static int
+pci_vfio_has_supported_extensions(int vfio_container_fd) {
+ int ret;
+ unsigned idx, n_extensions = 0;
+ for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+ const struct vfio_iommu_type *t = &iommu_types[idx];
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
- "error %i (%s)\n", errno, strerror(errno));
+ ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
+ t->type_id);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " could not get IOMMU type, "
+ "error %i (%s)\n", errno,
+ strerror(errno));
+ close(vfio_container_fd);
return -1;
+ } else if (ret == 1) {
+ /* we found a supported extension */
+ n_extensions++;
}
+ RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n",
+ t->type_id, t->name,
+ ret ? "supported" : "not supported");
+ }
+
+ /* if we didn't find any supported IOMMU types, fail */
+ if (!n_extensions) {
+ close(vfio_container_fd);
+ return -1;
}
return 0;
@@ -400,17 +416,10 @@ pci_vfio_get_container_fd(void)
return -1;
}
- /* check if we support IOMMU type 1 */
- ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
- if (ret != 1) {
- if (ret < 0)
- RTE_LOG(ERR, EAL, " could not get IOMMU type, "
- "error %i (%s)\n", errno,
- strerror(errno));
- else
- RTE_LOG(ERR, EAL, " unsupported IOMMU type "
- "detected in VFIO\n");
- close(vfio_container_fd);
+ ret = pci_vfio_has_supported_extensions(vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " no supported IOMMU "
+ "extensions found!\n");
return -1;
}
@@ -460,6 +469,7 @@ pci_vfio_get_group_fd(int iommu_group_no)
/* if primary, try to open the group */
if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ /* try regular group format */
snprintf(filename, sizeof(filename),
VFIO_GROUP_FMT, iommu_group_no);
vfio_group_fd = open(filename, O_RDWR);
@@ -470,7 +480,20 @@ pci_vfio_get_group_fd(int iommu_group_no)
strerror(errno));
return -1;
}
- return 0;
+
+ /* special case: try no-IOMMU path as well */
+ snprintf(filename, sizeof(filename),
+ VFIO_NOIOMMU_GROUP_FMT, iommu_group_no);
+ vfio_group_fd = open(filename, O_RDWR);
+ if (vfio_group_fd < 0) {
+ if (errno != ENOENT) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+ }
+ /* noiommu group found */
}
/* if the fd is valid, create a new group for it */
@@ -688,14 +711,21 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
}
/*
- * set up DMA mappings for container
+ * pick an IOMMU type and set up DMA mappings for container
*
* needs to be done only once, only when at least one group is assigned to
* a container and only in primary process
*/
if (internal_config.process_type == RTE_PROC_PRIMARY &&
vfio_cfg.vfio_container_has_dma == 0) {
- ret = pci_vfio_setup_dma_maps(vfio_cfg.vfio_container_fd);
+ /* select an IOMMU type which we will be using */
+ const struct vfio_iommu_type *t =
+ pci_vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
+ if (!t) {
+ RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", pci_addr);
+ return -1;
+ }
+ ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
if (ret) {
RTE_LOG(ERR, EAL, " %s DMA remapping failed, "
"error %i (%s)\n", pci_addr, errno, strerror(errno));
@@ -935,35 +965,41 @@ pci_vfio_enable(void)
{
/* initialize group list */
int i;
- int module_vfio_type1;
+ int vfio_available;
for (i = 0; i < VFIO_MAX_GROUPS; i++) {
vfio_cfg.vfio_groups[i].fd = -1;
vfio_cfg.vfio_groups[i].group_no = -1;
}
- module_vfio_type1 = rte_eal_check_module("vfio_iommu_type1");
+ /* inform the user that we are probing for VFIO */
+ RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
+
+ /* check if vfio-pci module is loaded */
+ vfio_available = rte_eal_check_module("vfio_pci");
/* return error directly */
- if (module_vfio_type1 == -1) {
+ if (vfio_available == -1) {
RTE_LOG(INFO, EAL, "Could not get loaded module details!\n");
return -1;
}
/* return 0 if VFIO modules not loaded */
- if (module_vfio_type1 == 0) {
- RTE_LOG(INFO, EAL, "VFIO modules not all loaded, "
- "skip VFIO support...\n");
+ if (vfio_available == 0) {
+ RTE_LOG(INFO, EAL, "VFIO modules not loaded, "
+ "skipping VFIO support...\n");
return 0;
}
vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd();
/* check if we have VFIO driver enabled */
- if (vfio_cfg.vfio_container_fd != -1)
+ if (vfio_cfg.vfio_container_fd != -1) {
+ RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
vfio_cfg.vfio_enabled = 1;
- else
+ } else {
RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
+ }
return 0;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_dma.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_dma.c
new file mode 100644
index 0000000..50d3563
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_dma.c
@@ -0,0 +1,84 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+
+#include "eal_pci_init.h"
+
+#ifdef VFIO_PRESENT
+
+int
+vfio_iommu_type1_dma_map(int vfio_container_fd)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ int i, ret;
+
+ /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ struct vfio_iommu_type1_dma_map dma_map;
+
+ if (ms[i].addr == NULL)
+ break;
+
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = ms[i].addr_64;
+ dma_map.size = ms[i].len;
+ dma_map.iova = ms[i].phys_addr;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int
+vfio_iommu_noiommu_dma_map(int __rte_unused vfio_container_fd)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 72ec3f6..638ee31 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -52,6 +52,11 @@
#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE
#endif
+/* older kernels may not have no-IOMMU mode */
+#ifndef VFIO_NOIOMMU_IOMMU
+#define VFIO_NOIOMMU_IOMMU 8
+#endif
+
#define VFIO_PRESENT
#endif /* kernel version */
#endif /* RTE_EAL_VFIO */
--
1.7.9.5
next prev parent reply other threads:[~2016-01-07 16:34 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-07 16:32 [dpdk-dev] [PATCH v3 00/12] Add virtio support for arm/arm64 Santosh Shukla
2016-01-07 16:32 ` [dpdk-dev] [PATCH v3 01/12] virtio: Introduce config RTE_VIRTIO_INC_VECTOR Santosh Shukla
2016-01-07 16:32 ` [dpdk-dev] [PATCH v3 02/12] config: i686: set RTE_VIRTIO_INC_VECTOR=n Santosh Shukla
2016-01-07 16:33 ` [dpdk-dev] [PATCH v3 03/12] linuxapp: eal: arm: Always return 0 for rte_eal_iopl_init() Santosh Shukla
2016-01-07 18:14 ` Stephen Hemminger
2016-01-09 13:18 ` Santosh Shukla
2016-01-07 16:33 ` [dpdk-dev] [PATCH v3 04/12] linuxapp/vfio: ignore mapping for ioport region Santosh Shukla
2016-01-07 18:16 ` Stephen Hemminger
2016-01-07 18:53 ` Santosh Shukla
2016-01-08 7:29 ` Yuanhan Liu
2016-01-07 16:33 ` [dpdk-dev] [PATCH v3 05/12] virtio_pci.h: build fix for sys/io.h for non-x86 arch Santosh Shukla
2016-01-07 16:33 ` [dpdk-dev] [PATCH v3 06/12] eal: pci: vfio: add rd/wr func for pci bar space Santosh Shukla
2016-01-07 18:19 ` Stephen Hemminger
2016-01-07 18:26 ` Wiles, Keith
2016-01-07 18:39 ` Stephen Hemminger
2016-01-07 18:48 ` Santosh Shukla
2016-01-07 18:46 ` Santosh Shukla
2016-01-13 14:47 ` Bruce Richardson
2016-01-07 16:33 ` [dpdk-dev] [PATCH v3 07/12] virtio: vfio: add api support to rd/wr ioport bar Santosh Shukla
2016-01-07 16:33 ` [dpdk-dev] [PATCH v3 08/12] virtio: Add capability to initialize driver for vfio interface Santosh Shukla
2016-01-07 16:33 ` [dpdk-dev] [PATCH v3 09/12] virtio: vfio: Enable RTE_PCI_DRV_NEED_MAPPING flag in driver Santosh Shukla
2016-01-07 18:20 ` Stephen Hemminger
2016-01-09 12:38 ` Santosh Shukla
2016-01-12 7:14 ` Yuanhan Liu
2016-01-13 12:18 ` Santosh Shukla
2016-01-07 16:33 ` [dpdk-dev] [PATCH v3 10/12] config: armv7/v8: Enable RTE_LIBRTE_VIRTIO_PMD Santosh Shukla
2016-01-07 16:33 ` Santosh Shukla [this message]
2016-01-07 16:33 ` [dpdk-dev] [PATCH v3 12/12] eal: pci: vfio: fix build error Santosh Shukla
2016-01-07 17:02 ` Burakov, Anatoly
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1452184390-5994-12-git-send-email-sshukla@mvista.com \
--to=sshukla@mvista.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).