* [dpdk-dev] [PATCH v3] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
@ 2018-01-12 10:22 Maxime Coquelin
2018-01-12 11:10 ` Burakov, Anatoly
2018-01-20 15:30 ` [dpdk-dev] [dpdk-stable] " Thomas Monjalon
0 siblings, 2 replies; 5+ messages in thread
From: Maxime Coquelin @ 2018-01-12 10:22 UTC (permalink / raw)
To: dev, stable, jianfeng.tan, qi.z.zhang, stephen, santosh.shukla,
anatoly.burakov, thomas
Cc: peterx, Maxime Coquelin
Intel VT-d supports different address widths for the IOVAs, from
39 bits to 56 bits.
While recent processors support at least 48 bits, VT-d emulation
currently only supports 39 bits. It makes DMA mapping to fail in this
case when using VA as IOVA mode, as user-space virtual addresses uses
up to 47 bits (see kernel's Documentation/x86/x86_64/mm.txt).
This patch parses VT-d CAP register value available in sysfs, and
forbid VA as IOVA mode if the GAW is 39 bits or unknown.
Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs")
Cc: stable@dpdk.org
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
Changes in v3:
==============
- Rely on MGAW bitfiled instead of SAGAW (Qi)
Changes in v2:
==============
- Rework pci_one_device_iommu_support_va #ifdefery (Stephen)
- Don't inline introduced functions (Stephen)
drivers/bus/pci/linux/pci.c | 90 ++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 81 insertions(+), 9 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 25f907e04..a89e8353d 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -547,6 +547,82 @@ pci_one_device_has_iova_va(void)
return 0;
}
+#if defined(RTE_ARCH_X86)
+static bool
+pci_one_device_iommu_support_va(struct rte_pci_device *dev)
+{
+#define VTD_CAP_MGAW_SHIFT 16
+#define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT)
+#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
+ struct rte_pci_addr *addr = &dev->addr;
+ char filename[PATH_MAX];
+ FILE *fp;
+ uint64_t mgaw, vtd_cap_reg = 0;
+
+ snprintf(filename, sizeof(filename),
+ "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
+ rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
+ addr->function);
+ if (access(filename, F_OK) == -1) {
+ /* We don't have an Intel IOMMU, assume VA supported*/
+ return true;
+ }
+
+ /* We have an intel IOMMU */
+ fp = fopen(filename, "r");
+ if (fp == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename);
+ return false;
+ }
+
+ if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) {
+ RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
+ fclose(fp);
+ return false;
+ }
+
+ fclose(fp);
+
+ mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
+ if (mgaw < X86_VA_WIDTH)
+ return false;
+
+ return true;
+}
+#elif defined(RTE_ARCH_PPC_64)
+static bool
+pci_one_device_iommu_support_va(struct rte_pci_device *dev)
+{
+ return false;
+}
+#else
+static bool
+pci_one_device_iommu_support_va(struct rte_pci_device *dev)
+{
+ return true;
+}
+#endif
+
+/*
+ * All devices IOMMUs support VA as IOVA
+ */
+static bool
+pci_devices_iommu_support_va(void)
+{
+ struct rte_pci_device *dev = NULL;
+ struct rte_pci_driver *drv = NULL;
+
+ FOREACH_DRIVER_ON_PCIBUS(drv) {
+ FOREACH_DEVICE_ON_PCIBUS(dev) {
+ if (!rte_pci_match(drv, dev))
+ continue;
+ if (!pci_one_device_iommu_support_va(dev))
+ return false;
+ }
+ }
+ return true;
+}
+
/*
* Get iommu class of PCI devices on the bus.
*/
@@ -557,12 +633,7 @@ rte_pci_get_iommu_class(void)
bool is_vfio_noiommu_enabled = true;
bool has_iova_va;
bool is_bound_uio;
- bool spapr_iommu =
-#if defined(RTE_ARCH_PPC_64)
- true;
-#else
- false;
-#endif
+ bool iommu_no_va;
is_bound = pci_one_device_is_bound();
if (!is_bound)
@@ -570,13 +641,14 @@ rte_pci_get_iommu_class(void)
has_iova_va = pci_one_device_has_iova_va();
is_bound_uio = pci_one_device_bound_uio();
+ iommu_no_va = !pci_devices_iommu_support_va();
#ifdef VFIO_PRESENT
is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ?
true : false;
#endif
if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled &&
- !spapr_iommu)
+ !iommu_no_va)
return RTE_IOVA_VA;
if (has_iova_va) {
@@ -585,8 +657,8 @@ rte_pci_get_iommu_class(void)
RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
if (is_bound_uio)
RTE_LOG(WARNING, EAL, "few device bound to UIO\n");
- if (spapr_iommu)
- RTE_LOG(WARNING, EAL, "sPAPR IOMMU does not support IOVA as VA\n");
+ if (iommu_no_va)
+ RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as VA\n");
}
return RTE_IOVA_PA;
--
2.14.3
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [dpdk-dev] [PATCH v3] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-12 10:22 [dpdk-dev] [PATCH v3] bus/pci: forbid VA as IOVA mode if IOMMU address width too small Maxime Coquelin
@ 2018-01-12 11:10 ` Burakov, Anatoly
2018-01-12 13:18 ` Maxime Coquelin
2018-01-20 15:30 ` [dpdk-dev] [dpdk-stable] " Thomas Monjalon
1 sibling, 1 reply; 5+ messages in thread
From: Burakov, Anatoly @ 2018-01-12 11:10 UTC (permalink / raw)
To: Maxime Coquelin, dev, stable, jianfeng.tan, qi.z.zhang, stephen,
santosh.shukla, thomas
Cc: peterx
<snip>
> +#if defined(RTE_ARCH_X86)
> +static bool
> +pci_one_device_iommu_support_va(struct rte_pci_device *dev)
> +{
> +#define VTD_CAP_MGAW_SHIFT 16
> +#define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT)
> +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
> + struct rte_pci_addr *addr = &dev->addr;
> + char filename[PATH_MAX];
> + FILE *fp;
> + uint64_t mgaw, vtd_cap_reg = 0;
> +
> + snprintf(filename, sizeof(filename),
> + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
> + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
> + addr->function);
> + if (access(filename, F_OK) == -1) {
> + /* We don't have an Intel IOMMU, assume VA supported*/
> + return true;
> + }
> +
> + /* We have an intel IOMMU */
> + fp = fopen(filename, "r");
> + if (fp == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename);
> + return false;
> + }
> +
> + if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) {
> + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
> + fclose(fp);
> + return false;
> + }
> +
> + fclose(fp);
Hi Maxime,
You probably want to use eal_parse_sysfs_value() for this.
--
Thanks,
Anatoly
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [dpdk-dev] [PATCH v3] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-12 11:10 ` Burakov, Anatoly
@ 2018-01-12 13:18 ` Maxime Coquelin
2018-01-12 22:46 ` Chas Williams
0 siblings, 1 reply; 5+ messages in thread
From: Maxime Coquelin @ 2018-01-12 13:18 UTC (permalink / raw)
To: Burakov, Anatoly, dev, stable, jianfeng.tan, qi.z.zhang, stephen,
santosh.shukla, thomas
Cc: peterx
Hi Anatoly,
On 01/12/2018 12:10 PM, Burakov, Anatoly wrote:
> <snip>
>
>> +#if defined(RTE_ARCH_X86)
>> +static bool
>> +pci_one_device_iommu_support_va(struct rte_pci_device *dev)
>> +{
>> +#define VTD_CAP_MGAW_SHIFT 16
>> +#define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT)
>> +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
>> + struct rte_pci_addr *addr = &dev->addr;
>> + char filename[PATH_MAX];
>> + FILE *fp;
>> + uint64_t mgaw, vtd_cap_reg = 0;
>> +
>> + snprintf(filename, sizeof(filename),
>> + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
>> + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
>> + addr->function);
>> + if (access(filename, F_OK) == -1) {
>> + /* We don't have an Intel IOMMU, assume VA supported*/
>> + return true;
>> + }
>> +
>> + /* We have an intel IOMMU */
>> + fp = fopen(filename, "r");
>> + if (fp == NULL) {
>> + RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename);
>> + return false;
>> + }
>> +
>> + if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) {
>> + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
>> + fclose(fp);
>> + return false;
>> + }
>> +
>> + fclose(fp);
>
> Hi Maxime,
>
> You probably want to use eal_parse_sysfs_value() for this.
I initially planned to use it, but the sysfs value is hexadecimal, but
not prefixed with "0x". For example:
# cat /sys/devices/pci0000\:00/0000\:00\:02.0/iommu/intel-iommu/cap
1c0000c40660462
So strtoul() assumes the value is decimal in this case, as explained in
its man page:
"
DESCRIPTION
<snip\>
If base is zero or 16, the string may then include a "0x" prefix, and
the number will be read in base 16; otherwise, a zero base is taken as
10 (decimal)
"
Thanks,
Maxime
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [dpdk-dev] [PATCH v3] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-12 13:18 ` Maxime Coquelin
@ 2018-01-12 22:46 ` Chas Williams
0 siblings, 0 replies; 5+ messages in thread
From: Chas Williams @ 2018-01-12 22:46 UTC (permalink / raw)
To: Maxime Coquelin
Cc: Burakov, Anatoly, dev, stable, Jianfeng Tan, Zhang, Qi Z,
Stephen Hemminger, santosh.shukla, Thomas Monjalon, peterx
Tested-by: Chas Williams <chas@att.com>
On Fri, Jan 12, 2018 at 8:18 AM, Maxime Coquelin <maxime.coquelin@redhat.com
> wrote:
> Hi Anatoly,
>
>
> On 01/12/2018 12:10 PM, Burakov, Anatoly wrote:
>
>> <snip>
>>
>> +#if defined(RTE_ARCH_X86)
>>> +static bool
>>> +pci_one_device_iommu_support_va(struct rte_pci_device *dev)
>>> +{
>>> +#define VTD_CAP_MGAW_SHIFT 16
>>> +#define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT)
>>> +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
>>> + struct rte_pci_addr *addr = &dev->addr;
>>> + char filename[PATH_MAX];
>>> + FILE *fp;
>>> + uint64_t mgaw, vtd_cap_reg = 0;
>>> +
>>> + snprintf(filename, sizeof(filename),
>>> + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
>>> + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
>>> + addr->function);
>>> + if (access(filename, F_OK) == -1) {
>>> + /* We don't have an Intel IOMMU, assume VA supported*/
>>> + return true;
>>> + }
>>> +
>>> + /* We have an intel IOMMU */
>>> + fp = fopen(filename, "r");
>>> + if (fp == NULL) {
>>> + RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename);
>>> + return false;
>>> + }
>>> +
>>> + if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) {
>>> + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
>>> + fclose(fp);
>>> + return false;
>>> + }
>>> +
>>> + fclose(fp);
>>>
>>
>> Hi Maxime,
>>
>> You probably want to use eal_parse_sysfs_value() for this.
>>
>
> I initially planned to use it, but the sysfs value is hexadecimal, but
> not prefixed with "0x". For example:
>
> # cat /sys/devices/pci0000\:00/0000\:00\:02.0/iommu/intel-iommu/cap
> 1c0000c40660462
>
> So strtoul() assumes the value is decimal in this case, as explained in
> its man page:
>
> "
> DESCRIPTION
> <snip\>
> If base is zero or 16, the string may then include a "0x" prefix, and
> the number will be read in base 16; otherwise, a zero base is taken as
> 10 (decimal)
> "
>
> Thanks,
> Maxime
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [dpdk-dev] [dpdk-stable] [PATCH v3] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-12 10:22 [dpdk-dev] [PATCH v3] bus/pci: forbid VA as IOVA mode if IOMMU address width too small Maxime Coquelin
2018-01-12 11:10 ` Burakov, Anatoly
@ 2018-01-20 15:30 ` Thomas Monjalon
1 sibling, 0 replies; 5+ messages in thread
From: Thomas Monjalon @ 2018-01-20 15:30 UTC (permalink / raw)
To: Maxime Coquelin
Cc: stable, dev, jianfeng.tan, qi.z.zhang, stephen, santosh.shukla,
anatoly.burakov, peterx
12/01/2018 11:22, Maxime Coquelin:
> Intel VT-d supports different address widths for the IOVAs, from
> 39 bits to 56 bits.
>
> While recent processors support at least 48 bits, VT-d emulation
> currently only supports 39 bits. It makes DMA mapping to fail in this
> case when using VA as IOVA mode, as user-space virtual addresses uses
> up to 47 bits (see kernel's Documentation/x86/x86_64/mm.txt).
>
> This patch parses VT-d CAP register value available in sysfs, and
> forbid VA as IOVA mode if the GAW is 39 bits or unknown.
>
> Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs")
>
> Cc: stable@dpdk.org
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
[...]
> + if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) {
Compilation error on 32-bit. Fix:
- if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) {
+ if (fscanf(fp, "%" PRIx64, &vtd_cap_reg) != 1) {
[...]
> +#elif defined(RTE_ARCH_PPC_64)
> +static bool
> +pci_one_device_iommu_support_va(struct rte_pci_device *dev)
> +{
> + return false;
> +}
> +#else
> +static bool
> +pci_one_device_iommu_support_va(struct rte_pci_device *dev)
> +{
> + return true;
> +}
> +#endif
Compilation error on non-x86. Fix:
#elif defined(RTE_ARCH_PPC_64)
static bool
-pci_one_device_iommu_support_va(struct rte_pci_device *dev)
+pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
{
return false;
}
#else
static bool
-pci_one_device_iommu_support_va(struct rte_pci_device *dev)
+pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
{
return true;
}
Applied with above fixes, thanks.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2018-01-20 15:30 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-12 10:22 [dpdk-dev] [PATCH v3] bus/pci: forbid VA as IOVA mode if IOMMU address width too small Maxime Coquelin
2018-01-12 11:10 ` Burakov, Anatoly
2018-01-12 13:18 ` Maxime Coquelin
2018-01-12 22:46 ` Chas Williams
2018-01-20 15:30 ` [dpdk-dev] [dpdk-stable] " Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).