From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id D72239402 for ; Wed, 27 Jan 2016 15:24:01 +0100 (CET) Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga102.jf.intel.com with ESMTP; 27 Jan 2016 06:24:00 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.22,354,1449561600"; d="scan'208";a="869836215" Received: from irsmsx106.ger.corp.intel.com ([163.33.3.31]) by orsmga001.jf.intel.com with ESMTP; 27 Jan 2016 06:23:59 -0800 Received: from irsmsx109.ger.corp.intel.com ([169.254.13.100]) by IRSMSX106.ger.corp.intel.com ([169.254.8.197]) with mapi id 14.03.0248.002; Wed, 27 Jan 2016 14:23:57 +0000 From: "Burakov, Anatoly" To: "Burakov, Anatoly" , "dev@dpdk.org" Thread-Topic: [dpdk-dev] [PATCH v3] vfio: Support for no-IOMMU mode Thread-Index: AQHRWQu1HJdUiwD+bkmqfvmUrefC8p8PamkA Date: Wed, 27 Jan 2016 14:23:57 +0000 Message-ID: References: <1452688569-14695-1-git-send-email-anatoly.burakov@intel.com> <1453903474-18807-1-git-send-email-anatoly.burakov@intel.com> In-Reply-To: <1453903474-18807-1-git-send-email-anatoly.burakov@intel.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [163.33.239.181] Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Subject: Re: [dpdk-dev] [PATCH v3] vfio: Support for no-IOMMU mode X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 27 Jan 2016 14:24:02 -0000 Apologies, lost the signoff from Santosh Shukla and also the commit message= still mentions the file that is now non-existent, so I'll submit a v4. Thanks, Anatoly > -----Original Message----- > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Anatoly Burakov > Sent: Wednesday, January 27, 2016 2:05 PM > To: dev@dpdk.org > Subject: [dpdk-dev] [PATCH v3] vfio: Support for no-IOMMU mode >=20 > This commit is adding a generic mechanism to support multiple IOMMU > types. For now, it's only type 1 (x86 IOMMU) and no-IOMMU (a special VFIO > mode that doesn't use IOMMU at all), but it's easily extended by adding > necessary definitions into eal_pci_init.h and a DMA mapping function to > eal_pci_vfio_dma.c. >=20 > Since type 1 IOMMU module is no longer necessary to have VFIO, we fix the > module check to check for vfio-pci instead. It's not ideal and triggers V= FIO > checks more often (and thus produces more error output, which was the > reason behind the module check in the first place), so we compensate for > that by providing more verbose logging, indicating whether VFIO initializ= ation > has succeeded or failed. >=20 > Signed-off-by: Anatoly Burakov > Tested-by: Santosh Shukla > --- > v3 changes: > Merging DMA mapping functions back into eal_pci_vfio.c > Fixing and adding comments >=20 > v2 changes: > Compile fix (hat-tip to Santosh Shukla) > Tested-by is provisional, since only superficial testing was done >=20 > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 205 +++++++++++++++++++++-- > ------ > lib/librte_eal/linuxapp/eal/eal_vfio.h | 5 + > 2 files changed, 157 insertions(+), 53 deletions(-) >=20 > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > index 74f91ba..fdf334b 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > @@ -72,11 +72,74 @@ EAL_REGISTER_TAILQ(rte_vfio_tailq) > #define VFIO_DIR "/dev/vfio" > #define VFIO_CONTAINER_PATH "/dev/vfio/vfio" > #define VFIO_GROUP_FMT "/dev/vfio/%u" > +#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" > #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) >=20 > /* per-process VFIO config */ > static struct vfio_config vfio_cfg; >=20 > +/* DMA mapping function prototype. > + * Takes VFIO container fd as a parameter. > + * Returns 0 on success, -1 on error. > + * */ > +typedef int (*vfio_dma_func_t)(int); > + > +struct vfio_iommu_type { > + int type_id; > + const char *name; > + vfio_dma_func_t dma_map_func; > +}; > + > +int vfio_iommu_type1_dma_map(int); > +int vfio_iommu_noiommu_dma_map(int); > + > +/* IOMMU types we support */ > +static const struct vfio_iommu_type iommu_types[] =3D { > + /* x86 IOMMU, otherwise known as type 1 */ > + { VFIO_TYPE1_IOMMU, "Type 1", > &vfio_iommu_type1_dma_map}, > + /* IOMMU-less mode */ > + { VFIO_NOIOMMU_IOMMU, "No-IOMMU", > &vfio_iommu_noiommu_dma_map}, }; > + > +int > +vfio_iommu_type1_dma_map(int vfio_container_fd) { > + const struct rte_memseg *ms =3D rte_eal_get_physmem_layout(); > + int i, ret; > + > + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ > + for (i =3D 0; i < RTE_MAX_MEMSEG; i++) { > + struct vfio_iommu_type1_dma_map dma_map; > + > + if (ms[i].addr =3D=3D NULL) > + break; > + > + memset(&dma_map, 0, sizeof(dma_map)); > + dma_map.argsz =3D sizeof(struct > vfio_iommu_type1_dma_map); > + dma_map.vaddr =3D ms[i].addr_64; > + dma_map.size =3D ms[i].len; > + dma_map.iova =3D ms[i].phys_addr; > + dma_map.flags =3D VFIO_DMA_MAP_FLAG_READ | > VFIO_DMA_MAP_FLAG_WRITE; > + > + ret =3D ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, > &dma_map); > + > + if (ret) { > + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, > " > + "error %i (%s)\n", errno, > strerror(errno)); > + return -1; > + } > + } > + > + return 0; > +} > + > +int > +vfio_iommu_noiommu_dma_map(int __rte_unused vfio_container_fd) { > + /* No-IOMMU mode does not need DMA mapping */ > + return 0; > +} > + > int > pci_vfio_read_config(const struct rte_intr_handle *intr_handle, > void *buf, size_t len, off_t offs) @@ -208,42 +271,58 @@ > pci_vfio_set_bus_master(int dev_fd) > return 0; > } >=20 > -/* set up DMA mappings */ > -static int > -pci_vfio_setup_dma_maps(int vfio_container_fd) -{ > - const struct rte_memseg *ms =3D rte_eal_get_physmem_layout(); > - int i, ret; > - > - ret =3D ioctl(vfio_container_fd, VFIO_SET_IOMMU, > - VFIO_TYPE1_IOMMU); > - if (ret) { > - RTE_LOG(ERR, EAL, " cannot set IOMMU type, " > - "error %i (%s)\n", errno, strerror(errno)); > - return -1; > +/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for > +error */ static const struct vfio_iommu_type * > +pci_vfio_set_iommu_type(int vfio_container_fd) { > + unsigned idx; > + for (idx =3D 0; idx < RTE_DIM(iommu_types); idx++) { > + const struct vfio_iommu_type *t =3D &iommu_types[idx]; > + > + int ret =3D ioctl(vfio_container_fd, VFIO_SET_IOMMU, > + t->type_id); > + if (!ret) { > + RTE_LOG(NOTICE, EAL, " using IOMMU type %d > (%s)\n", > + t->type_id, t->name); > + return t; > + } > + /* not an error, there may be more supported IOMMU types > */ > + RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, " > + "error %i (%s)\n", t->type_id, t->name, > errno, > + strerror(errno)); > } > + /* if we didn't find a suitable IOMMU type, fail */ > + return NULL; > +} >=20 > - /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ > - for (i =3D 0; i < RTE_MAX_MEMSEG; i++) { > - struct vfio_iommu_type1_dma_map dma_map; > - > - if (ms[i].addr =3D=3D NULL) > - break; > - > - memset(&dma_map, 0, sizeof(dma_map)); > - dma_map.argsz =3D sizeof(struct > vfio_iommu_type1_dma_map); > - dma_map.vaddr =3D ms[i].addr_64; > - dma_map.size =3D ms[i].len; > - dma_map.iova =3D ms[i].phys_addr; > - dma_map.flags =3D VFIO_DMA_MAP_FLAG_READ | > VFIO_DMA_MAP_FLAG_WRITE; > - > - ret =3D ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, > &dma_map); > +/* check if we have any supported extensions */ static int > +pci_vfio_has_supported_extensions(int vfio_container_fd) { > + int ret; > + unsigned idx, n_extensions =3D 0; > + for (idx =3D 0; idx < RTE_DIM(iommu_types); idx++) { > + const struct vfio_iommu_type *t =3D &iommu_types[idx]; >=20 > - if (ret) { > - RTE_LOG(ERR, EAL, " cannot set up DMA remapping, > " > - "error %i (%s)\n", errno, > strerror(errno)); > + ret =3D ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, > + t->type_id); > + if (ret < 0) { > + RTE_LOG(ERR, EAL, " could not get IOMMU type, " > + "error %i (%s)\n", errno, > + strerror(errno)); > + close(vfio_container_fd); > return -1; > + } else if (ret =3D=3D 1) { > + /* we found a supported extension */ > + n_extensions++; > } > + RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n", > + t->type_id, t->name, > + ret ? "supported" : "not supported"); > + } > + > + /* if we didn't find any supported IOMMU types, fail */ > + if (!n_extensions) { > + close(vfio_container_fd); > + return -1; > } >=20 > return 0; > @@ -372,17 +451,10 @@ pci_vfio_get_container_fd(void) > return -1; > } >=20 > - /* check if we support IOMMU type 1 */ > - ret =3D ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, > VFIO_TYPE1_IOMMU); > - if (ret !=3D 1) { > - if (ret < 0) > - RTE_LOG(ERR, EAL, " could not get IOMMU > type, " > - "error %i (%s)\n", errno, > - strerror(errno)); > - else > - RTE_LOG(ERR, EAL, " unsupported IOMMU > type " > - "detected in VFIO\n"); > - close(vfio_container_fd); > + ret =3D > pci_vfio_has_supported_extensions(vfio_container_fd); > + if (ret) { > + RTE_LOG(ERR, EAL, " no supported IOMMU " > + "extensions found!\n"); > return -1; > } >=20 > @@ -432,6 +504,7 @@ pci_vfio_get_group_fd(int iommu_group_no) >=20 > /* if primary, try to open the group */ > if (internal_config.process_type =3D=3D RTE_PROC_PRIMARY) { > + /* try regular group format */ > snprintf(filename, sizeof(filename), > VFIO_GROUP_FMT, iommu_group_no); > vfio_group_fd =3D open(filename, O_RDWR); @@ -442,7 > +515,20 @@ pci_vfio_get_group_fd(int iommu_group_no) > strerror(errno)); > return -1; > } > - return 0; > + > + /* special case: try no-IOMMU path as well */ > + snprintf(filename, sizeof(filename), > + VFIO_NOIOMMU_GROUP_FMT, > iommu_group_no); > + vfio_group_fd =3D open(filename, O_RDWR); > + if (vfio_group_fd < 0) { > + if (errno !=3D ENOENT) { > + RTE_LOG(ERR, EAL, "Cannot open %s: > %s\n", filename, > + strerror(errno)); > + return -1; > + } > + return 0; > + } > + /* noiommu group found */ > } >=20 > /* if the fd is valid, create a new group for it */ @@ -660,14 > +746,21 @@ pci_vfio_map_resource(struct rte_pci_device *dev) > } >=20 > /* > - * set up DMA mappings for container > + * pick an IOMMU type and set up DMA mappings for container > * > * needs to be done only once, only when at least one group is > assigned to > * a container and only in primary process > */ > if (internal_config.process_type =3D=3D RTE_PROC_PRIMARY && > vfio_cfg.vfio_container_has_dma =3D=3D 0) { > - ret =3D > pci_vfio_setup_dma_maps(vfio_cfg.vfio_container_fd); > + /* select an IOMMU type which we will be using */ > + const struct vfio_iommu_type *t =3D > + > pci_vfio_set_iommu_type(vfio_cfg.vfio_container_fd); > + if (!t) { > + RTE_LOG(ERR, EAL, " %s failed to select IOMMU > type\n", pci_addr); > + return -1; > + } > + ret =3D t->dma_map_func(vfio_cfg.vfio_container_fd); > if (ret) { > RTE_LOG(ERR, EAL, " %s DMA remapping failed, " > "error %i (%s)\n", pci_addr, errno, > strerror(errno)); @@ -887,35 +980,41 @@ pci_vfio_enable(void) { > /* initialize group list */ > int i; > - int module_vfio_type1; > + int vfio_available; >=20 > for (i =3D 0; i < VFIO_MAX_GROUPS; i++) { > vfio_cfg.vfio_groups[i].fd =3D -1; > vfio_cfg.vfio_groups[i].group_no =3D -1; > } >=20 > - module_vfio_type1 =3D rte_eal_check_module("vfio_iommu_type1"); > + /* inform the user that we are probing for VFIO */ > + RTE_LOG(INFO, EAL, "Probing VFIO support...\n"); > + > + /* check if vfio-pci module is loaded */ > + vfio_available =3D rte_eal_check_module("vfio_pci"); >=20 > /* return error directly */ > - if (module_vfio_type1 =3D=3D -1) { > + if (vfio_available =3D=3D -1) { > RTE_LOG(INFO, EAL, "Could not get loaded module > details!\n"); > return -1; > } >=20 > /* return 0 if VFIO modules not loaded */ > - if (module_vfio_type1 =3D=3D 0) { > - RTE_LOG(INFO, EAL, "VFIO modules not all loaded, " > - "skip VFIO support...\n"); > + if (vfio_available =3D=3D 0) { > + RTE_LOG(INFO, EAL, "VFIO modules not loaded, " > + "skipping VFIO support...\n"); > return 0; > } >=20 > vfio_cfg.vfio_container_fd =3D pci_vfio_get_container_fd(); >=20 > /* check if we have VFIO driver enabled */ > - if (vfio_cfg.vfio_container_fd !=3D -1) > + if (vfio_cfg.vfio_container_fd !=3D -1) { > + RTE_LOG(NOTICE, EAL, "VFIO support initialized\n"); > vfio_cfg.vfio_enabled =3D 1; > - else > + } else { > RTE_LOG(NOTICE, EAL, "VFIO support could not be > initialized\n"); > + } >=20 > return 0; > } > diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h > b/lib/librte_eal/linuxapp/eal/eal_vfio.h > index 72ec3f6..638ee31 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h > +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h > @@ -52,6 +52,11 @@ > #define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE #endif >=20 > +/* older kernels may not have no-IOMMU mode */ #ifndef > +VFIO_NOIOMMU_IOMMU #define VFIO_NOIOMMU_IOMMU 8 #endif > + > #define VFIO_PRESENT > #endif /* kernel version */ > #endif /* RTE_EAL_VFIO */ > -- > 2.5.0