From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id 4E6DB1D9E; Fri, 12 Jan 2018 09:21:30 +0100 (CET) Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.phx2.redhat.com [10.5.11.14]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 7532D25BBE; Fri, 12 Jan 2018 08:21:28 +0000 (UTC) Received: from [10.36.112.28] (ovpn-112-28.ams2.redhat.com [10.36.112.28]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 5AA8075545; Fri, 12 Jan 2018 08:21:23 +0000 (UTC) To: "Zhang, Qi Z" , "dev@dpdk.org" , "stable@dpdk.org" , "Tan, Jianfeng" , "santosh.shukla@caviumnetworks.com" , "Burakov, Anatoly" , "thomas@monjalon.net" , "stephen@networkplumber.org" Cc: "peterx@redhat.com" References: <20180109131801.26520-1-maxime.coquelin@redhat.com> <039ED4275CED7440929022BC67E706115312BA47@SHSMSX103.ccr.corp.intel.com> From: Maxime Coquelin Message-ID: <00daa33f-deba-d9d5-750c-b06d49143506@redhat.com> Date: Fri, 12 Jan 2018 09:21:21 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.5.2 MIME-Version: 1.0 In-Reply-To: <039ED4275CED7440929022BC67E706115312BA47@SHSMSX103.ccr.corp.intel.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit X-Scanned-By: MIMEDefang 2.79 on 10.5.11.14 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Fri, 12 Jan 2018 08:21:28 +0000 (UTC) Subject: Re: [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 12 Jan 2018 08:21:30 -0000 On 01/12/2018 04:56 AM, Zhang, Qi Z wrote: > > >> -----Original Message----- >> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Maxime Coquelin >> Sent: Tuesday, January 9, 2018 9:18 PM >> To: dev@dpdk.org; stable@dpdk.org; Tan, Jianfeng ; >> santosh.shukla@caviumnetworks.com; Burakov, Anatoly >> ; thomas@monjalon.net; >> stephen@networkplumber.org >> Cc: peterx@redhat.com; Maxime Coquelin >> Subject: [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU >> address width too small >> >> Intel VT-d supports different address widths for the IOVAs, from >> 39 bits to 56 bits. >> >> While recent processors support at least 48 bits, VT-d emulation currently >> only supports 39 bits. It makes DMA mapping to fail in this case when using >> VA as IOVA mode, as user-space virtual addresses uses up to 47 bits (see >> kernel's Documentation/x86/x86_64/mm.txt). >> >> This patch parses VT-d CAP register value available in sysfs, and forbid VA as >> IOVA mode if the GAW is 39 bits or unknown. >> >> Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs") >> >> Cc: stable@dpdk.org >> Signed-off-by: Maxime Coquelin >> --- >> >> Changes in v2: >> ============== >> - Rework pci_one_device_iommu_support_va #ifdefery (Stephen) >> - Don't inline introduced functions (Stephen) >> >> drivers/bus/pci/linux/pci.c | 108 >> ++++++++++++++++++++++++++++++++++++++++---- >> 1 file changed, 99 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index >> 25f907e04..0a43c4b89 100644 >> --- a/drivers/bus/pci/linux/pci.c >> +++ b/drivers/bus/pci/linux/pci.c >> @@ -547,6 +547,100 @@ pci_one_device_has_iova_va(void) >> return 0; >> } >> >> +#if defined(RTE_ARCH_X86) >> +static bool >> +pci_one_device_iommu_support_va(struct rte_pci_device *dev) { >> +#define VTD_CAP_SAGAW_SHIFT 8 >> +#define VTD_CAP_SAGAW_MASK (0x1fULL << >> VTD_CAP_SAGAW_SHIFT) >> +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt >> */ >> + struct rte_pci_addr *addr = &dev->addr; >> + char filename[PATH_MAX]; >> + FILE *fp; >> + uint64_t sagaw, vtd_cap_reg = 0; >> + int guest_addr_width = 0; >> + >> + snprintf(filename, sizeof(filename), >> + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap", >> + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, >> + addr->function); >> + if (access(filename, F_OK) == -1) { >> + /* We don't have an Intel IOMMU, assume VA supported*/ >> + return true; >> + } >> + >> + /* We have an intel IOMMU */ >> + fp = fopen(filename, "r"); >> + if (fp == NULL) { >> + RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename); >> + return false; >> + } >> + >> + if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) { >> + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename); >> + fclose(fp); >> + return false; >> + } >> + >> + fclose(fp); >> + >> + sagaw = (vtd_cap_reg & VTD_CAP_SAGAW_MASK) >> >> VTD_CAP_SAGAW_SHIFT; > > Base on previous test, sagaw is not the MAX VA address > > Below should be the correct cap decode from kernel driver include/linux/intel-iommu.h > #define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) Oh, you are right, I should check MGAW and no SAGAW. I checked VTD-d emulation in QEMU and it indeed sets 39 bits for MGAW (0x38 value) so it will work. I'll post the patch in the coming hours. Thanks a lot for having run the test and the suggestion. Maxime > Regards > Qi > >> + >> + switch (sagaw) { >> + case 2: >> + guest_addr_width = 39; >> + break; >> + case 4: >> + guest_addr_width = 48; >> + break; >> + case 6: >> + guest_addr_width = 56; >> + break; >> + default: >> + RTE_LOG(ERR, EAL, "Unkwown Intel IOMMU SAGAW value (%lx)\n", >> + sagaw); >> + break; >> + } >> + >> + if (guest_addr_width < X86_VA_WIDTH) >> + return false; >> + >> + return true; >> +} >> +#elif defined(RTE_ARCH_PPC_64) >> +static bool >> +pci_one_device_iommu_support_va(struct rte_pci_device *dev) { >> + return false; >> +} >> +#else >> +static bool >> +pci_one_device_iommu_support_va(struct rte_pci_device *dev) { >> + return true; >> +} >> +#endif >> + >> +/* >> + * All devices IOMMUs support VA as IOVA */ static bool >> +pci_devices_iommu_support_va(void) >> +{ >> + struct rte_pci_device *dev = NULL; >> + struct rte_pci_driver *drv = NULL; >> + >> + FOREACH_DRIVER_ON_PCIBUS(drv) { >> + FOREACH_DEVICE_ON_PCIBUS(dev) { >> + if (!rte_pci_match(drv, dev)) >> + continue; >> + if (!pci_one_device_iommu_support_va(dev)) >> + return false; >> + } >> + } >> + return true; >> +} >> + >> /* >> * Get iommu class of PCI devices on the bus. >> */ >> @@ -557,12 +651,7 @@ rte_pci_get_iommu_class(void) >> bool is_vfio_noiommu_enabled = true; >> bool has_iova_va; >> bool is_bound_uio; >> - bool spapr_iommu = >> -#if defined(RTE_ARCH_PPC_64) >> - true; >> -#else >> - false; >> -#endif >> + bool iommu_no_va; >> >> is_bound = pci_one_device_is_bound(); >> if (!is_bound) >> @@ -570,13 +659,14 @@ rte_pci_get_iommu_class(void) >> >> has_iova_va = pci_one_device_has_iova_va(); >> is_bound_uio = pci_one_device_bound_uio(); >> + iommu_no_va = !pci_devices_iommu_support_va(); >> #ifdef VFIO_PRESENT >> is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ? >> true : false; >> #endif >> >> if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled && >> - !spapr_iommu) >> + !iommu_no_va) >> return RTE_IOVA_VA; >> >> if (has_iova_va) { >> @@ -585,8 +675,8 @@ rte_pci_get_iommu_class(void) >> RTE_LOG(WARNING, EAL, "vfio-noiommu mode >> configured\n"); >> if (is_bound_uio) >> RTE_LOG(WARNING, EAL, "few device bound to UIO\n"); >> - if (spapr_iommu) >> - RTE_LOG(WARNING, EAL, "sPAPR IOMMU does not support >> IOVA as VA\n"); >> + if (iommu_no_va) >> + RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as >> VA\n"); >> } >> >> return RTE_IOVA_PA; >> -- >> 2.14.3 >