From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id 721911041; Fri, 12 Jan 2018 04:56:08 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 11 Jan 2018 19:56:07 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,347,1511856000"; d="scan'208";a="9537318" Received: from fmsmsx103.amr.corp.intel.com ([10.18.124.201]) by fmsmga002.fm.intel.com with ESMTP; 11 Jan 2018 19:56:06 -0800 Received: from fmsmsx151.amr.corp.intel.com (10.18.125.4) by FMSMSX103.amr.corp.intel.com (10.18.124.201) with Microsoft SMTP Server (TLS) id 14.3.319.2; Thu, 11 Jan 2018 19:56:06 -0800 Received: from shsmsx151.ccr.corp.intel.com (10.239.6.50) by FMSMSX151.amr.corp.intel.com (10.18.125.4) with Microsoft SMTP Server (TLS) id 14.3.319.2; Thu, 11 Jan 2018 19:56:06 -0800 Received: from shsmsx103.ccr.corp.intel.com ([169.254.4.213]) by SHSMSX151.ccr.corp.intel.com ([169.254.3.218]) with mapi id 14.03.0319.002; Fri, 12 Jan 2018 11:56:04 +0800 From: "Zhang, Qi Z" To: Maxime Coquelin , "dev@dpdk.org" , "stable@dpdk.org" , "Tan, Jianfeng" , "santosh.shukla@caviumnetworks.com" , "Burakov, Anatoly" , "thomas@monjalon.net" , "stephen@networkplumber.org" CC: "peterx@redhat.com" Thread-Topic: [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small Thread-Index: AQHTiUxeTBCfb9PJFUqtSQDMcTNKvqNvnQzw Date: Fri, 12 Jan 2018 03:56:03 +0000 Message-ID: <039ED4275CED7440929022BC67E706115312BA47@SHSMSX103.ccr.corp.intel.com> References: <20180109131801.26520-1-maxime.coquelin@redhat.com> In-Reply-To: <20180109131801.26520-1-maxime.coquelin@redhat.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: dlp-product: dlpe-windows dlp-version: 11.0.0.116 dlp-reaction: no-action x-originating-ip: [10.239.127.40] Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Subject: Re: [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 12 Jan 2018 03:56:09 -0000 > -----Original Message----- > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Maxime Coquelin > Sent: Tuesday, January 9, 2018 9:18 PM > To: dev@dpdk.org; stable@dpdk.org; Tan, Jianfeng = ; > santosh.shukla@caviumnetworks.com; Burakov, Anatoly > ; thomas@monjalon.net; > stephen@networkplumber.org > Cc: peterx@redhat.com; Maxime Coquelin > Subject: [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU > address width too small >=20 > Intel VT-d supports different address widths for the IOVAs, from > 39 bits to 56 bits. >=20 > While recent processors support at least 48 bits, VT-d emulation currentl= y > only supports 39 bits. It makes DMA mapping to fail in this case when usi= ng > VA as IOVA mode, as user-space virtual addresses uses up to 47 bits (see > kernel's Documentation/x86/x86_64/mm.txt). >=20 > This patch parses VT-d CAP register value available in sysfs, and forbid = VA as > IOVA mode if the GAW is 39 bits or unknown. >=20 > Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs") >=20 > Cc: stable@dpdk.org > Signed-off-by: Maxime Coquelin > --- >=20 > Changes in v2: > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D > - Rework pci_one_device_iommu_support_va #ifdefery (Stephen) > - Don't inline introduced functions (Stephen) >=20 > drivers/bus/pci/linux/pci.c | 108 > ++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 99 insertions(+), 9 deletions(-) >=20 > diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c in= dex > 25f907e04..0a43c4b89 100644 > --- a/drivers/bus/pci/linux/pci.c > +++ b/drivers/bus/pci/linux/pci.c > @@ -547,6 +547,100 @@ pci_one_device_has_iova_va(void) > return 0; > } >=20 > +#if defined(RTE_ARCH_X86) > +static bool > +pci_one_device_iommu_support_va(struct rte_pci_device *dev) { > +#define VTD_CAP_SAGAW_SHIFT 8 > +#define VTD_CAP_SAGAW_MASK (0x1fULL << > VTD_CAP_SAGAW_SHIFT) > +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt > */ > + struct rte_pci_addr *addr =3D &dev->addr; > + char filename[PATH_MAX]; > + FILE *fp; > + uint64_t sagaw, vtd_cap_reg =3D 0; > + int guest_addr_width =3D 0; > + > + snprintf(filename, sizeof(filename), > + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap", > + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, > + addr->function); > + if (access(filename, F_OK) =3D=3D -1) { > + /* We don't have an Intel IOMMU, assume VA supported*/ > + return true; > + } > + > + /* We have an intel IOMMU */ > + fp =3D fopen(filename, "r"); > + if (fp =3D=3D NULL) { > + RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename); > + return false; > + } > + > + if (fscanf(fp, "%lx", &vtd_cap_reg) !=3D 1) { > + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename); > + fclose(fp); > + return false; > + } > + > + fclose(fp); > + > + sagaw =3D (vtd_cap_reg & VTD_CAP_SAGAW_MASK) >> > VTD_CAP_SAGAW_SHIFT; Base on previous test, sagaw is not the MAX VA address Below should be the correct cap decode from kernel driver include/linux/int= el-iommu.h #define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) Regards Qi > + > + switch (sagaw) { > + case 2: > + guest_addr_width =3D 39; > + break; > + case 4: > + guest_addr_width =3D 48; > + break; > + case 6: > + guest_addr_width =3D 56; > + break; > + default: > + RTE_LOG(ERR, EAL, "Unkwown Intel IOMMU SAGAW value (%lx)\n", > + sagaw); > + break; > + } > + > + if (guest_addr_width < X86_VA_WIDTH) > + return false; > + > + return true; > +} > +#elif defined(RTE_ARCH_PPC_64) > +static bool > +pci_one_device_iommu_support_va(struct rte_pci_device *dev) { > + return false; > +} > +#else > +static bool > +pci_one_device_iommu_support_va(struct rte_pci_device *dev) { > + return true; > +} > +#endif > + > +/* > + * All devices IOMMUs support VA as IOVA */ static bool > +pci_devices_iommu_support_va(void) > +{ > + struct rte_pci_device *dev =3D NULL; > + struct rte_pci_driver *drv =3D NULL; > + > + FOREACH_DRIVER_ON_PCIBUS(drv) { > + FOREACH_DEVICE_ON_PCIBUS(dev) { > + if (!rte_pci_match(drv, dev)) > + continue; > + if (!pci_one_device_iommu_support_va(dev)) > + return false; > + } > + } > + return true; > +} > + > /* > * Get iommu class of PCI devices on the bus. > */ > @@ -557,12 +651,7 @@ rte_pci_get_iommu_class(void) > bool is_vfio_noiommu_enabled =3D true; > bool has_iova_va; > bool is_bound_uio; > - bool spapr_iommu =3D > -#if defined(RTE_ARCH_PPC_64) > - true; > -#else > - false; > -#endif > + bool iommu_no_va; >=20 > is_bound =3D pci_one_device_is_bound(); > if (!is_bound) > @@ -570,13 +659,14 @@ rte_pci_get_iommu_class(void) >=20 > has_iova_va =3D pci_one_device_has_iova_va(); > is_bound_uio =3D pci_one_device_bound_uio(); > + iommu_no_va =3D !pci_devices_iommu_support_va(); > #ifdef VFIO_PRESENT > is_vfio_noiommu_enabled =3D rte_vfio_noiommu_is_enabled() =3D=3D true ? > true : false; > #endif >=20 > if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled && > - !spapr_iommu) > + !iommu_no_va) > return RTE_IOVA_VA; >=20 > if (has_iova_va) { > @@ -585,8 +675,8 @@ rte_pci_get_iommu_class(void) > RTE_LOG(WARNING, EAL, "vfio-noiommu mode > configured\n"); > if (is_bound_uio) > RTE_LOG(WARNING, EAL, "few device bound to UIO\n"); > - if (spapr_iommu) > - RTE_LOG(WARNING, EAL, "sPAPR IOMMU does not support > IOVA as VA\n"); > + if (iommu_no_va) > + RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as > VA\n"); > } >=20 > return RTE_IOVA_PA; > -- > 2.14.3