* [dpdk-stable] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
@ 2018-01-09 13:18 Maxime Coquelin
2018-01-11 20:19 ` Maxime Coquelin
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Maxime Coquelin @ 2018-01-09 13:18 UTC (permalink / raw)
To: dev, stable, jianfeng.tan, santosh.shukla, anatoly.burakov,
thomas, stephen
Cc: peterx, Maxime Coquelin
Intel VT-d supports different address widths for the IOVAs, from
39 bits to 56 bits.
While recent processors support at least 48 bits, VT-d emulation
currently only supports 39 bits. It makes DMA mapping to fail in this
case when using VA as IOVA mode, as user-space virtual addresses uses
up to 47 bits (see kernel's Documentation/x86/x86_64/mm.txt).
This patch parses VT-d CAP register value available in sysfs, and
forbid VA as IOVA mode if the GAW is 39 bits or unknown.
Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs")
Cc: stable@dpdk.org
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
Changes in v2:
==============
- Rework pci_one_device_iommu_support_va #ifdefery (Stephen)
- Don't inline introduced functions (Stephen)
drivers/bus/pci/linux/pci.c | 108 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 99 insertions(+), 9 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 25f907e04..0a43c4b89 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -547,6 +547,100 @@ pci_one_device_has_iova_va(void)
return 0;
}
+#if defined(RTE_ARCH_X86)
+static bool
+pci_one_device_iommu_support_va(struct rte_pci_device *dev)
+{
+#define VTD_CAP_SAGAW_SHIFT 8
+#define VTD_CAP_SAGAW_MASK (0x1fULL << VTD_CAP_SAGAW_SHIFT)
+#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
+ struct rte_pci_addr *addr = &dev->addr;
+ char filename[PATH_MAX];
+ FILE *fp;
+ uint64_t sagaw, vtd_cap_reg = 0;
+ int guest_addr_width = 0;
+
+ snprintf(filename, sizeof(filename),
+ "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
+ rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
+ addr->function);
+ if (access(filename, F_OK) == -1) {
+ /* We don't have an Intel IOMMU, assume VA supported*/
+ return true;
+ }
+
+ /* We have an intel IOMMU */
+ fp = fopen(filename, "r");
+ if (fp == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename);
+ return false;
+ }
+
+ if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) {
+ RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
+ fclose(fp);
+ return false;
+ }
+
+ fclose(fp);
+
+ sagaw = (vtd_cap_reg & VTD_CAP_SAGAW_MASK) >> VTD_CAP_SAGAW_SHIFT;
+
+ switch (sagaw) {
+ case 2:
+ guest_addr_width = 39;
+ break;
+ case 4:
+ guest_addr_width = 48;
+ break;
+ case 6:
+ guest_addr_width = 56;
+ break;
+ default:
+ RTE_LOG(ERR, EAL, "Unkwown Intel IOMMU SAGAW value (%lx)\n",
+ sagaw);
+ break;
+ }
+
+ if (guest_addr_width < X86_VA_WIDTH)
+ return false;
+
+ return true;
+}
+#elif defined(RTE_ARCH_PPC_64)
+static bool
+pci_one_device_iommu_support_va(struct rte_pci_device *dev)
+{
+ return false;
+}
+#else
+static bool
+pci_one_device_iommu_support_va(struct rte_pci_device *dev)
+{
+ return true;
+}
+#endif
+
+/*
+ * All devices IOMMUs support VA as IOVA
+ */
+static bool
+pci_devices_iommu_support_va(void)
+{
+ struct rte_pci_device *dev = NULL;
+ struct rte_pci_driver *drv = NULL;
+
+ FOREACH_DRIVER_ON_PCIBUS(drv) {
+ FOREACH_DEVICE_ON_PCIBUS(dev) {
+ if (!rte_pci_match(drv, dev))
+ continue;
+ if (!pci_one_device_iommu_support_va(dev))
+ return false;
+ }
+ }
+ return true;
+}
+
/*
* Get iommu class of PCI devices on the bus.
*/
@@ -557,12 +651,7 @@ rte_pci_get_iommu_class(void)
bool is_vfio_noiommu_enabled = true;
bool has_iova_va;
bool is_bound_uio;
- bool spapr_iommu =
-#if defined(RTE_ARCH_PPC_64)
- true;
-#else
- false;
-#endif
+ bool iommu_no_va;
is_bound = pci_one_device_is_bound();
if (!is_bound)
@@ -570,13 +659,14 @@ rte_pci_get_iommu_class(void)
has_iova_va = pci_one_device_has_iova_va();
is_bound_uio = pci_one_device_bound_uio();
+ iommu_no_va = !pci_devices_iommu_support_va();
#ifdef VFIO_PRESENT
is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ?
true : false;
#endif
if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled &&
- !spapr_iommu)
+ !iommu_no_va)
return RTE_IOVA_VA;
if (has_iova_va) {
@@ -585,8 +675,8 @@ rte_pci_get_iommu_class(void)
RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
if (is_bound_uio)
RTE_LOG(WARNING, EAL, "few device bound to UIO\n");
- if (spapr_iommu)
- RTE_LOG(WARNING, EAL, "sPAPR IOMMU does not support IOVA as VA\n");
+ if (iommu_no_va)
+ RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as VA\n");
}
return RTE_IOVA_PA;
--
2.14.3
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [dpdk-stable] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-09 13:18 [dpdk-stable] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small Maxime Coquelin
@ 2018-01-11 20:19 ` Maxime Coquelin
2018-01-12 1:15 ` Tan, Jianfeng
2018-01-12 3:00 ` Tan, Jianfeng
2018-01-12 3:56 ` [dpdk-stable] [dpdk-dev] " Zhang, Qi Z
2 siblings, 1 reply; 7+ messages in thread
From: Maxime Coquelin @ 2018-01-11 20:19 UTC (permalink / raw)
To: jianfeng.tan
Cc: dev, stable, santosh.shukla, anatoly.burakov, thomas, stephen, peterx
Hi Jianfeng,
On 01/09/2018 02:18 PM, Maxime Coquelin wrote:
> Intel VT-d supports different address widths for the IOVAs, from
> 39 bits to 56 bits.
>
> While recent processors support at least 48 bits, VT-d emulation
> currently only supports 39 bits. It makes DMA mapping to fail in this
> case when using VA as IOVA mode, as user-space virtual addresses uses
> up to 47 bits (see kernel's Documentation/x86/x86_64/mm.txt).
>
> This patch parses VT-d CAP register value available in sysfs, and
> forbid VA as IOVA mode if the GAW is 39 bits or unknown.
>
> Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs")
>
> Cc: stable@dpdk.org
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>
> Changes in v2:
> ==============
> - Rework pci_one_device_iommu_support_va #ifdefery (Stephen)
> - Don't inline introduced functions (Stephen)
>
> drivers/bus/pci/linux/pci.c | 108 ++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 99 insertions(+), 9 deletions(-)
>
Could you please try the patch and confirm it does not break your
--no-huge usecase?
Are you fine with the fix?
Thanks,
Maxime
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [dpdk-stable] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-11 20:19 ` Maxime Coquelin
@ 2018-01-12 1:15 ` Tan, Jianfeng
0 siblings, 0 replies; 7+ messages in thread
From: Tan, Jianfeng @ 2018-01-12 1:15 UTC (permalink / raw)
To: Maxime Coquelin
Cc: dev, stable, santosh.shukla, Burakov, Anatoly, thomas, stephen, peterx
Hi Maxime,
> -----Original Message-----
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> Sent: Friday, January 12, 2018 4:20 AM
> To: Tan, Jianfeng
> Cc: dev@dpdk.org; stable@dpdk.org; santosh.shukla@caviumnetworks.com;
> Burakov, Anatoly; thomas@monjalon.net; stephen@networkplumber.org;
> peterx@redhat.com
> Subject: Re: [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address
> width too small
>
> Hi Jianfeng,
>
> On 01/09/2018 02:18 PM, Maxime Coquelin wrote:
> > Intel VT-d supports different address widths for the IOVAs, from
> > 39 bits to 56 bits.
> >
> > While recent processors support at least 48 bits, VT-d emulation
> > currently only supports 39 bits. It makes DMA mapping to fail in this
> > case when using VA as IOVA mode, as user-space virtual addresses uses
> > up to 47 bits (see kernel's Documentation/x86/x86_64/mm.txt).
Yes, I also notice this issue on some of Intel platform with limited width for IOVA. But I was always suggesting to use --base-virtaddr to work around.
> >
> > This patch parses VT-d CAP register value available in sysfs, and
> > forbid VA as IOVA mode if the GAW is 39 bits or unknown.
> >
> > Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs")
> >
> > Cc: stable@dpdk.org
> > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> >
> > Changes in v2:
> > ==============
> > - Rework pci_one_device_iommu_support_va #ifdefery (Stephen)
> > - Don't inline introduced functions (Stephen)
> >
> > drivers/bus/pci/linux/pci.c | 108
> ++++++++++++++++++++++++++++++++++++++++----
> > 1 file changed, 99 insertions(+), 9 deletions(-)
> >
>
> Could you please try the patch and confirm it does not break your
> --no-huge usecase?
I've tested the --no-huge case, it still works on my xeon E5 server. I'll add my Tested-by to the original patch.
>
> Are you fine with the fix?
Forbidding such usage seems a little overkill to me actually. How about just making it as a warning like:
"IOMMU only support 39-bit IOVA address, you might want to use --base-virtaddr to restrict the range of virtual address (IOVA)"
Thanks,
Jianfeng
>
> Thanks,
> Maxime
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [dpdk-stable] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-09 13:18 [dpdk-stable] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small Maxime Coquelin
2018-01-11 20:19 ` Maxime Coquelin
@ 2018-01-12 3:00 ` Tan, Jianfeng
2018-01-12 3:28 ` Zhang, Qi Z
2018-01-12 3:56 ` [dpdk-stable] [dpdk-dev] " Zhang, Qi Z
2 siblings, 1 reply; 7+ messages in thread
From: Tan, Jianfeng @ 2018-01-12 3:00 UTC (permalink / raw)
To: 'Maxime Coquelin',
dev, stable, santosh.shukla, Burakov, Anatoly, thomas, stephen
Cc: peterx, Zhang, Qi Z
> -----Original Message-----
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> Sent: Tuesday, January 9, 2018 9:18 PM
> To: dev@dpdk.org; stable@dpdk.org; Tan, Jianfeng;
> santosh.shukla@caviumnetworks.com; Burakov, Anatoly;
> thomas@monjalon.net; stephen@networkplumber.org
> Cc: peterx@redhat.com; Maxime Coquelin
> Subject: [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address
> width too small
>
> Intel VT-d supports different address widths for the IOVAs, from
> 39 bits to 56 bits.
>
> While recent processors support at least 48 bits, VT-d emulation
> currently only supports 39 bits. It makes DMA mapping to fail in this
> case when using VA as IOVA mode, as user-space virtual addresses uses
> up to 47 bits (see kernel's Documentation/x86/x86_64/mm.txt).
>
> This patch parses VT-d CAP register value available in sysfs, and
> forbid VA as IOVA mode if the GAW is 39 bits or unknown.
>
> Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs")
>
> Cc: stable@dpdk.org
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
I don't have strong objection on this patch. Plus, this patch has been verified with --no-huge, and Intel low-end processors (with the help of Zhang Qi).
Tested-by: Jianfeng Tan <jianfeng.tan@intel.com>
> ---
>
> Changes in v2:
> ==============
> - Rework pci_one_device_iommu_support_va #ifdefery (Stephen)
> - Don't inline introduced functions (Stephen)
>
> drivers/bus/pci/linux/pci.c | 108
> ++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 99 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index 25f907e04..0a43c4b89 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -547,6 +547,100 @@ pci_one_device_has_iova_va(void)
> return 0;
> }
>
> +#if defined(RTE_ARCH_X86)
> +static bool
> +pci_one_device_iommu_support_va(struct rte_pci_device *dev)
> +{
> +#define VTD_CAP_SAGAW_SHIFT 8
> +#define VTD_CAP_SAGAW_MASK (0x1fULL <<
> VTD_CAP_SAGAW_SHIFT)
> +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
> + struct rte_pci_addr *addr = &dev->addr;
> + char filename[PATH_MAX];
> + FILE *fp;
> + uint64_t sagaw, vtd_cap_reg = 0;
> + int guest_addr_width = 0;
> +
> + snprintf(filename, sizeof(filename),
> + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
> + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr-
> >devid,
> + addr->function);
> + if (access(filename, F_OK) == -1) {
> + /* We don't have an Intel IOMMU, assume VA supported*/
A nitpick: missed a space between "supported" and "*/"
Thanks,
Jianfeng
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [dpdk-stable] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-12 3:00 ` Tan, Jianfeng
@ 2018-01-12 3:28 ` Zhang, Qi Z
0 siblings, 0 replies; 7+ messages in thread
From: Zhang, Qi Z @ 2018-01-12 3:28 UTC (permalink / raw)
To: Tan, Jianfeng, 'Maxime Coquelin',
dev, stable, santosh.shukla, Burakov, Anatoly, thomas, stephen
Cc: peterx
> -----Original Message-----
> From: Tan, Jianfeng
> Sent: Friday, January 12, 2018 11:01 AM
> To: 'Maxime Coquelin' <maxime.coquelin@redhat.com>; dev@dpdk.org;
> stable@dpdk.org; santosh.shukla@caviumnetworks.com; Burakov, Anatoly
> <anatoly.burakov@intel.com>; thomas@monjalon.net;
> stephen@networkplumber.org
> Cc: peterx@redhat.com; Zhang, Qi Z <qi.z.zhang@intel.com>
> Subject: RE: [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address
> width too small
>
>
>
> > -----Original Message-----
> > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > Sent: Tuesday, January 9, 2018 9:18 PM
> > To: dev@dpdk.org; stable@dpdk.org; Tan, Jianfeng;
> > santosh.shukla@caviumnetworks.com; Burakov, Anatoly;
> > thomas@monjalon.net; stephen@networkplumber.org
> > Cc: peterx@redhat.com; Maxime Coquelin
> > Subject: [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address
> > width too small
> >
> > Intel VT-d supports different address widths for the IOVAs, from
> > 39 bits to 56 bits.
> >
> > While recent processors support at least 48 bits, VT-d emulation
> > currently only supports 39 bits. It makes DMA mapping to fail in this
> > case when using VA as IOVA mode, as user-space virtual addresses uses
> > up to 47 bits (see kernel's Documentation/x86/x86_64/mm.txt).
> >
> > This patch parses VT-d CAP register value available in sysfs, and
> > forbid VA as IOVA mode if the GAW is 39 bits or unknown.
> >
> > Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs")
> >
> > Cc: stable@dpdk.org
> > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>
> I don't have strong objection on this patch. Plus, this patch has been verified
> with --no-huge, and Intel low-end processors (with the help of Zhang Qi).
Sorry, my bad, I just found it is tested with igb_uio.
re-tested it with vfio, seems VA width is no decoded correctly
I have sagaw = 4, which means 48b VA address, but the real device is 39, so IOVA is not disabled, still see that failure.
Thanks
Qi
>
> Tested-by: Jianfeng Tan <jianfeng.tan@intel.com>
>
> > ---
> >
> > Changes in v2:
> > ==============
> > - Rework pci_one_device_iommu_support_va #ifdefery (Stephen)
> > - Don't inline introduced functions (Stephen)
> >
> > drivers/bus/pci/linux/pci.c | 108
> > ++++++++++++++++++++++++++++++++++++++++----
> > 1 file changed, 99 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> > index 25f907e04..0a43c4b89 100644
> > --- a/drivers/bus/pci/linux/pci.c
> > +++ b/drivers/bus/pci/linux/pci.c
> > @@ -547,6 +547,100 @@ pci_one_device_has_iova_va(void)
> > return 0;
> > }
> >
> > +#if defined(RTE_ARCH_X86)
> > +static bool
> > +pci_one_device_iommu_support_va(struct rte_pci_device *dev) {
> > +#define VTD_CAP_SAGAW_SHIFT 8
> > +#define VTD_CAP_SAGAW_MASK (0x1fULL <<
> > VTD_CAP_SAGAW_SHIFT)
> > +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt
> */
> > + struct rte_pci_addr *addr = &dev->addr;
> > + char filename[PATH_MAX];
> > + FILE *fp;
> > + uint64_t sagaw, vtd_cap_reg = 0;
> > + int guest_addr_width = 0;
> > +
> > + snprintf(filename, sizeof(filename),
> > + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
> > + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr-
> > >devid,
> > + addr->function);
> > + if (access(filename, F_OK) == -1) {
> > + /* We don't have an Intel IOMMU, assume VA supported*/
>
> A nitpick: missed a space between "supported" and "*/"
>
> Thanks,
> Jianfeng
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [dpdk-stable] [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-09 13:18 [dpdk-stable] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small Maxime Coquelin
2018-01-11 20:19 ` Maxime Coquelin
2018-01-12 3:00 ` Tan, Jianfeng
@ 2018-01-12 3:56 ` Zhang, Qi Z
2018-01-12 8:21 ` Maxime Coquelin
2 siblings, 1 reply; 7+ messages in thread
From: Zhang, Qi Z @ 2018-01-12 3:56 UTC (permalink / raw)
To: Maxime Coquelin, dev, stable, Tan, Jianfeng, santosh.shukla,
Burakov, Anatoly, thomas, stephen
Cc: peterx
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Maxime Coquelin
> Sent: Tuesday, January 9, 2018 9:18 PM
> To: dev@dpdk.org; stable@dpdk.org; Tan, Jianfeng <jianfeng.tan@intel.com>;
> santosh.shukla@caviumnetworks.com; Burakov, Anatoly
> <anatoly.burakov@intel.com>; thomas@monjalon.net;
> stephen@networkplumber.org
> Cc: peterx@redhat.com; Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU
> address width too small
>
> Intel VT-d supports different address widths for the IOVAs, from
> 39 bits to 56 bits.
>
> While recent processors support at least 48 bits, VT-d emulation currently
> only supports 39 bits. It makes DMA mapping to fail in this case when using
> VA as IOVA mode, as user-space virtual addresses uses up to 47 bits (see
> kernel's Documentation/x86/x86_64/mm.txt).
>
> This patch parses VT-d CAP register value available in sysfs, and forbid VA as
> IOVA mode if the GAW is 39 bits or unknown.
>
> Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs")
>
> Cc: stable@dpdk.org
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>
> Changes in v2:
> ==============
> - Rework pci_one_device_iommu_support_va #ifdefery (Stephen)
> - Don't inline introduced functions (Stephen)
>
> drivers/bus/pci/linux/pci.c | 108
> ++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 99 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index
> 25f907e04..0a43c4b89 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -547,6 +547,100 @@ pci_one_device_has_iova_va(void)
> return 0;
> }
>
> +#if defined(RTE_ARCH_X86)
> +static bool
> +pci_one_device_iommu_support_va(struct rte_pci_device *dev) {
> +#define VTD_CAP_SAGAW_SHIFT 8
> +#define VTD_CAP_SAGAW_MASK (0x1fULL <<
> VTD_CAP_SAGAW_SHIFT)
> +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt
> */
> + struct rte_pci_addr *addr = &dev->addr;
> + char filename[PATH_MAX];
> + FILE *fp;
> + uint64_t sagaw, vtd_cap_reg = 0;
> + int guest_addr_width = 0;
> +
> + snprintf(filename, sizeof(filename),
> + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
> + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
> + addr->function);
> + if (access(filename, F_OK) == -1) {
> + /* We don't have an Intel IOMMU, assume VA supported*/
> + return true;
> + }
> +
> + /* We have an intel IOMMU */
> + fp = fopen(filename, "r");
> + if (fp == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename);
> + return false;
> + }
> +
> + if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) {
> + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
> + fclose(fp);
> + return false;
> + }
> +
> + fclose(fp);
> +
> + sagaw = (vtd_cap_reg & VTD_CAP_SAGAW_MASK) >>
> VTD_CAP_SAGAW_SHIFT;
Base on previous test, sagaw is not the MAX VA address
Below should be the correct cap decode from kernel driver include/linux/intel-iommu.h
#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1)
Regards
Qi
> +
> + switch (sagaw) {
> + case 2:
> + guest_addr_width = 39;
> + break;
> + case 4:
> + guest_addr_width = 48;
> + break;
> + case 6:
> + guest_addr_width = 56;
> + break;
> + default:
> + RTE_LOG(ERR, EAL, "Unkwown Intel IOMMU SAGAW value (%lx)\n",
> + sagaw);
> + break;
> + }
> +
> + if (guest_addr_width < X86_VA_WIDTH)
> + return false;
> +
> + return true;
> +}
> +#elif defined(RTE_ARCH_PPC_64)
> +static bool
> +pci_one_device_iommu_support_va(struct rte_pci_device *dev) {
> + return false;
> +}
> +#else
> +static bool
> +pci_one_device_iommu_support_va(struct rte_pci_device *dev) {
> + return true;
> +}
> +#endif
> +
> +/*
> + * All devices IOMMUs support VA as IOVA */ static bool
> +pci_devices_iommu_support_va(void)
> +{
> + struct rte_pci_device *dev = NULL;
> + struct rte_pci_driver *drv = NULL;
> +
> + FOREACH_DRIVER_ON_PCIBUS(drv) {
> + FOREACH_DEVICE_ON_PCIBUS(dev) {
> + if (!rte_pci_match(drv, dev))
> + continue;
> + if (!pci_one_device_iommu_support_va(dev))
> + return false;
> + }
> + }
> + return true;
> +}
> +
> /*
> * Get iommu class of PCI devices on the bus.
> */
> @@ -557,12 +651,7 @@ rte_pci_get_iommu_class(void)
> bool is_vfio_noiommu_enabled = true;
> bool has_iova_va;
> bool is_bound_uio;
> - bool spapr_iommu =
> -#if defined(RTE_ARCH_PPC_64)
> - true;
> -#else
> - false;
> -#endif
> + bool iommu_no_va;
>
> is_bound = pci_one_device_is_bound();
> if (!is_bound)
> @@ -570,13 +659,14 @@ rte_pci_get_iommu_class(void)
>
> has_iova_va = pci_one_device_has_iova_va();
> is_bound_uio = pci_one_device_bound_uio();
> + iommu_no_va = !pci_devices_iommu_support_va();
> #ifdef VFIO_PRESENT
> is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ?
> true : false;
> #endif
>
> if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled &&
> - !spapr_iommu)
> + !iommu_no_va)
> return RTE_IOVA_VA;
>
> if (has_iova_va) {
> @@ -585,8 +675,8 @@ rte_pci_get_iommu_class(void)
> RTE_LOG(WARNING, EAL, "vfio-noiommu mode
> configured\n");
> if (is_bound_uio)
> RTE_LOG(WARNING, EAL, "few device bound to UIO\n");
> - if (spapr_iommu)
> - RTE_LOG(WARNING, EAL, "sPAPR IOMMU does not support
> IOVA as VA\n");
> + if (iommu_no_va)
> + RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as
> VA\n");
> }
>
> return RTE_IOVA_PA;
> --
> 2.14.3
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [dpdk-stable] [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small
2018-01-12 3:56 ` [dpdk-stable] [dpdk-dev] " Zhang, Qi Z
@ 2018-01-12 8:21 ` Maxime Coquelin
0 siblings, 0 replies; 7+ messages in thread
From: Maxime Coquelin @ 2018-01-12 8:21 UTC (permalink / raw)
To: Zhang, Qi Z, dev, stable, Tan, Jianfeng, santosh.shukla, Burakov,
Anatoly, thomas, stephen
Cc: peterx
On 01/12/2018 04:56 AM, Zhang, Qi Z wrote:
>
>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Maxime Coquelin
>> Sent: Tuesday, January 9, 2018 9:18 PM
>> To: dev@dpdk.org; stable@dpdk.org; Tan, Jianfeng <jianfeng.tan@intel.com>;
>> santosh.shukla@caviumnetworks.com; Burakov, Anatoly
>> <anatoly.burakov@intel.com>; thomas@monjalon.net;
>> stephen@networkplumber.org
>> Cc: peterx@redhat.com; Maxime Coquelin <maxime.coquelin@redhat.com>
>> Subject: [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU
>> address width too small
>>
>> Intel VT-d supports different address widths for the IOVAs, from
>> 39 bits to 56 bits.
>>
>> While recent processors support at least 48 bits, VT-d emulation currently
>> only supports 39 bits. It makes DMA mapping to fail in this case when using
>> VA as IOVA mode, as user-space virtual addresses uses up to 47 bits (see
>> kernel's Documentation/x86/x86_64/mm.txt).
>>
>> This patch parses VT-d CAP register value available in sysfs, and forbid VA as
>> IOVA mode if the GAW is 39 bits or unknown.
>>
>> Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs")
>>
>> Cc: stable@dpdk.org
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>>
>> Changes in v2:
>> ==============
>> - Rework pci_one_device_iommu_support_va #ifdefery (Stephen)
>> - Don't inline introduced functions (Stephen)
>>
>> drivers/bus/pci/linux/pci.c | 108
>> ++++++++++++++++++++++++++++++++++++++++----
>> 1 file changed, 99 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index
>> 25f907e04..0a43c4b89 100644
>> --- a/drivers/bus/pci/linux/pci.c
>> +++ b/drivers/bus/pci/linux/pci.c
>> @@ -547,6 +547,100 @@ pci_one_device_has_iova_va(void)
>> return 0;
>> }
>>
>> +#if defined(RTE_ARCH_X86)
>> +static bool
>> +pci_one_device_iommu_support_va(struct rte_pci_device *dev) {
>> +#define VTD_CAP_SAGAW_SHIFT 8
>> +#define VTD_CAP_SAGAW_MASK (0x1fULL <<
>> VTD_CAP_SAGAW_SHIFT)
>> +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt
>> */
>> + struct rte_pci_addr *addr = &dev->addr;
>> + char filename[PATH_MAX];
>> + FILE *fp;
>> + uint64_t sagaw, vtd_cap_reg = 0;
>> + int guest_addr_width = 0;
>> +
>> + snprintf(filename, sizeof(filename),
>> + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
>> + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
>> + addr->function);
>> + if (access(filename, F_OK) == -1) {
>> + /* We don't have an Intel IOMMU, assume VA supported*/
>> + return true;
>> + }
>> +
>> + /* We have an intel IOMMU */
>> + fp = fopen(filename, "r");
>> + if (fp == NULL) {
>> + RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename);
>> + return false;
>> + }
>> +
>> + if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) {
>> + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
>> + fclose(fp);
>> + return false;
>> + }
>> +
>> + fclose(fp);
>> +
>> + sagaw = (vtd_cap_reg & VTD_CAP_SAGAW_MASK) >>
>> VTD_CAP_SAGAW_SHIFT;
>
> Base on previous test, sagaw is not the MAX VA address
>
> Below should be the correct cap decode from kernel driver include/linux/intel-iommu.h
> #define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1)
Oh, you are right, I should check MGAW and no SAGAW.
I checked VTD-d emulation in QEMU and it indeed sets 39 bits for MGAW
(0x38 value) so it will work.
I'll post the patch in the coming hours.
Thanks a lot for having run the test and the suggestion.
Maxime
> Regards
> Qi
>
>> +
>> + switch (sagaw) {
>> + case 2:
>> + guest_addr_width = 39;
>> + break;
>> + case 4:
>> + guest_addr_width = 48;
>> + break;
>> + case 6:
>> + guest_addr_width = 56;
>> + break;
>> + default:
>> + RTE_LOG(ERR, EAL, "Unkwown Intel IOMMU SAGAW value (%lx)\n",
>> + sagaw);
>> + break;
>> + }
>> +
>> + if (guest_addr_width < X86_VA_WIDTH)
>> + return false;
>> +
>> + return true;
>> +}
>> +#elif defined(RTE_ARCH_PPC_64)
>> +static bool
>> +pci_one_device_iommu_support_va(struct rte_pci_device *dev) {
>> + return false;
>> +}
>> +#else
>> +static bool
>> +pci_one_device_iommu_support_va(struct rte_pci_device *dev) {
>> + return true;
>> +}
>> +#endif
>> +
>> +/*
>> + * All devices IOMMUs support VA as IOVA */ static bool
>> +pci_devices_iommu_support_va(void)
>> +{
>> + struct rte_pci_device *dev = NULL;
>> + struct rte_pci_driver *drv = NULL;
>> +
>> + FOREACH_DRIVER_ON_PCIBUS(drv) {
>> + FOREACH_DEVICE_ON_PCIBUS(dev) {
>> + if (!rte_pci_match(drv, dev))
>> + continue;
>> + if (!pci_one_device_iommu_support_va(dev))
>> + return false;
>> + }
>> + }
>> + return true;
>> +}
>> +
>> /*
>> * Get iommu class of PCI devices on the bus.
>> */
>> @@ -557,12 +651,7 @@ rte_pci_get_iommu_class(void)
>> bool is_vfio_noiommu_enabled = true;
>> bool has_iova_va;
>> bool is_bound_uio;
>> - bool spapr_iommu =
>> -#if defined(RTE_ARCH_PPC_64)
>> - true;
>> -#else
>> - false;
>> -#endif
>> + bool iommu_no_va;
>>
>> is_bound = pci_one_device_is_bound();
>> if (!is_bound)
>> @@ -570,13 +659,14 @@ rte_pci_get_iommu_class(void)
>>
>> has_iova_va = pci_one_device_has_iova_va();
>> is_bound_uio = pci_one_device_bound_uio();
>> + iommu_no_va = !pci_devices_iommu_support_va();
>> #ifdef VFIO_PRESENT
>> is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ?
>> true : false;
>> #endif
>>
>> if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled &&
>> - !spapr_iommu)
>> + !iommu_no_va)
>> return RTE_IOVA_VA;
>>
>> if (has_iova_va) {
>> @@ -585,8 +675,8 @@ rte_pci_get_iommu_class(void)
>> RTE_LOG(WARNING, EAL, "vfio-noiommu mode
>> configured\n");
>> if (is_bound_uio)
>> RTE_LOG(WARNING, EAL, "few device bound to UIO\n");
>> - if (spapr_iommu)
>> - RTE_LOG(WARNING, EAL, "sPAPR IOMMU does not support
>> IOVA as VA\n");
>> + if (iommu_no_va)
>> + RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as
>> VA\n");
>> }
>>
>> return RTE_IOVA_PA;
>> --
>> 2.14.3
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2018-01-12 8:21 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-09 13:18 [dpdk-stable] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU address width too small Maxime Coquelin
2018-01-11 20:19 ` Maxime Coquelin
2018-01-12 1:15 ` Tan, Jianfeng
2018-01-12 3:00 ` Tan, Jianfeng
2018-01-12 3:28 ` Zhang, Qi Z
2018-01-12 3:56 ` [dpdk-stable] [dpdk-dev] " Zhang, Qi Z
2018-01-12 8:21 ` Maxime Coquelin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).