DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width
@ 2018-08-05 18:41 Drocula
  2018-08-09 10:49 ` Burakov, Anatoly
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Drocula @ 2018-08-05 18:41 UTC (permalink / raw)
  To: maxime.coquelin; +Cc: dev, Drocula

The kernel version 4.14 released with the support of 5-level paging.
When PML5 enabled, user-space virtual addresses uses up to 56 bits.
see kernel's Documentation/x86/x86_64/mm.txt.

Signed-off-by: Drocula <quzeyao@gmail.com>
---
 drivers/bus/pci/linux/pci.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 004600f..8913d6d 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -4,6 +4,7 @@
 
 #include <string.h>
 #include <dirent.h>
+#include <sys/mman.h>
 
 #include <rte_log.h>
 #include <rte_bus.h>
@@ -553,12 +554,34 @@
 }
 
 #if defined(RTE_ARCH_X86)
+/*
+ * Try to detect whether the system uses 5-level page table.
+ */
+static bool
+system_uses_PML5(void)
+{
+	void *page_4k, *mask = (void *)0xf0000000000000;
+	page_4k = mmap(mask, 4096, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+	if (page_4k == (void *) -1)
+		return false;
+	munmap(page_4k, 4096);
+
+	if ((unsigned long)page_4k & (unsigned long)mask)
+		return true;
+	return false;
+}
+
 static bool
 pci_one_device_iommu_support_va(struct rte_pci_device *dev)
 {
 #define VTD_CAP_MGAW_SHIFT	16
 #define VTD_CAP_MGAW_MASK	(0x3fULL << VTD_CAP_MGAW_SHIFT)
-#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
+/*  From Documentation/x86/x86_64/mm.txt */
+#define X86_VA_WIDTH_PML4 47
+#define X86_VA_WIDTH_PML5 56
+
 	struct rte_pci_addr *addr = &dev->addr;
 	char filename[PATH_MAX];
 	FILE *fp;
@@ -589,7 +612,7 @@
 	fclose(fp);
 
 	mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
-	if (mgaw < X86_VA_WIDTH)
+	if (mgaw < (system_uses_PML5() ? X86_VA_WIDTH_PML5 : X86_VA_WIDTH_PML4))
 		return false;
 
 	return true;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width
  2018-08-05 18:41 [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width Drocula
@ 2018-08-09 10:49 ` Burakov, Anatoly
  2018-08-10  7:51   ` Drocula
  2018-08-09 17:03 ` Stephen Hemminger
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Burakov, Anatoly @ 2018-08-09 10:49 UTC (permalink / raw)
  To: Drocula, maxime.coquelin; +Cc: dev

On 05-Aug-18 7:41 PM, Drocula wrote:
> The kernel version 4.14 released with the support of 5-level paging.
> When PML5 enabled, user-space virtual addresses uses up to 56 bits.
> see kernel's Documentation/x86/x86_64/mm.txt.
> 
> Signed-off-by: Drocula <quzeyao@gmail.com>
> ---
>   drivers/bus/pci/linux/pci.c | 27 +++++++++++++++++++++++++--
>   1 file changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index 004600f..8913d6d 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -4,6 +4,7 @@
>   
>   #include <string.h>
>   #include <dirent.h>
> +#include <sys/mman.h>
>   
>   #include <rte_log.h>
>   #include <rte_bus.h>
> @@ -553,12 +554,34 @@
>   }
>   
>   #if defined(RTE_ARCH_X86)
> +/*
> + * Try to detect whether the system uses 5-level page table.
> + */
> +static bool
> +system_uses_PML5(void)
> +{
> +	void *page_4k, *mask = (void *)0xf0000000000000;
> +	page_4k = mmap(mask, 4096, PROT_READ | PROT_WRITE,
> +		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +
> +	if (page_4k == (void *) -1)
> +		return false;

Shouldn't this be MAP_FAILED?

> +	munmap(page_4k, 4096);
> +
> +	if ((unsigned long)page_4k & (unsigned long)mask)
> +		return true;
> +	return false;
> +}
> +
>   static bool
>   pci_one_device_iommu_support_va(struct rte_pci_device *dev)
>   {
>   #define VTD_CAP_MGAW_SHIFT	16
>   #define VTD_CAP_MGAW_MASK	(0x3fULL << VTD_CAP_MGAW_SHIFT)
> -#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
> +/*  From Documentation/x86/x86_64/mm.txt */
> +#define X86_VA_WIDTH_PML4 47
> +#define X86_VA_WIDTH_PML5 56
> +
>   	struct rte_pci_addr *addr = &dev->addr;
>   	char filename[PATH_MAX];
>   	FILE *fp;
> @@ -589,7 +612,7 @@
>   	fclose(fp);
>   
>   	mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
> -	if (mgaw < X86_VA_WIDTH)
> +	if (mgaw < (system_uses_PML5() ? X86_VA_WIDTH_PML5 : X86_VA_WIDTH_PML4))

This is perhaps nitpicking and a question of personal preferences, but i 
think storing this in a var would be more readable than doing ternary 
operator inside of an if statement.

>   		return false;
>   
>   	return true;
> 


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width
  2018-08-05 18:41 [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width Drocula
  2018-08-09 10:49 ` Burakov, Anatoly
@ 2018-08-09 17:03 ` Stephen Hemminger
  2018-08-10  8:35   ` Drocula
  2018-08-13 12:57 ` [dpdk-dev] [PATCH v2] " Drocula
  2023-06-09 16:31 ` [PATCH] " Stephen Hemminger
  3 siblings, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2018-08-09 17:03 UTC (permalink / raw)
  To: Drocula; +Cc: maxime.coquelin, dev

Thanks for the patch, there are some minor style/cleanups that
could be done.
 
>  #if defined(RTE_ARCH_X86)

Isn't this going to apply to 64 bit only?

> +/*
> + * Try to detect whether the system uses 5-level page table.
> + */
> +static bool
> +system_uses_PML5(void)
> +{
> +	void *page_4k, *mask = (void *)0xf0000000000000;

Magic constants expressed like this seem wrong. Why not use
shift to make it obvious.

Also, you are assuming a particular layout of memory on
Linux which might be problematic. Plus if there is already
some memory in use there, it won't work.

> +	page_4k = mmap(mask, 4096, PROT_READ | PROT_WRITE,
> +		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

Since you are probing maybe MAP_FIXED is what you want.

> +
> +	if (page_4k == (void *) -1)
> +		return false;
Use MMAP_FAILED here.

> +	munmap(page_4k, 4096);
> +
> +	if ((unsigned long)page_4k & (unsigned long)mask)
> +		return true;
> +	return false;

Wouldn't this work the same for what you expect?
	return page_4k == mask;

I.e you expect kernel to put page where you want.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width
  2018-08-09 10:49 ` Burakov, Anatoly
@ 2018-08-10  7:51   ` Drocula
  0 siblings, 0 replies; 9+ messages in thread
From: Drocula @ 2018-08-10  7:51 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: maxime.coquelin, dev

Thanks, will refine in v2.

On Thu, Aug 9, 2018, 18:49 Burakov, Anatoly <anatoly.burakov@intel.com>
wrote:

> On 05-Aug-18 7:41 PM, Drocula wrote:
> > The kernel version 4.14 released with the support of 5-level paging.
> > When PML5 enabled, user-space virtual addresses uses up to 56 bits.
> > see kernel's Documentation/x86/x86_64/mm.txt.
> >
> > Signed-off-by: Drocula <quzeyao@gmail.com>
> > ---
> >   drivers/bus/pci/linux/pci.c | 27 +++++++++++++++++++++++++--
> >   1 file changed, 25 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> > index 004600f..8913d6d 100644
> > --- a/drivers/bus/pci/linux/pci.c
> > +++ b/drivers/bus/pci/linux/pci.c
> > @@ -4,6 +4,7 @@
> >
> >   #include <string.h>
> >   #include <dirent.h>
> > +#include <sys/mman.h>
> >
> >   #include <rte_log.h>
> >   #include <rte_bus.h>
> > @@ -553,12 +554,34 @@
> >   }
> >
> >   #if defined(RTE_ARCH_X86)
> > +/*
> > + * Try to detect whether the system uses 5-level page table.
> > + */
> > +static bool
> > +system_uses_PML5(void)
> > +{
> > +     void *page_4k, *mask = (void *)0xf0000000000000;
> > +     page_4k = mmap(mask, 4096, PROT_READ | PROT_WRITE,
> > +             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> > +
> > +     if (page_4k == (void *) -1)
> > +             return false;
>
> Shouldn't this be MAP_FAILED?
>
> > +     munmap(page_4k, 4096);
> > +
> > +     if ((unsigned long)page_4k & (unsigned long)mask)
> > +             return true;
> > +     return false;
> > +}
> > +
> >   static bool
> >   pci_one_device_iommu_support_va(struct rte_pci_device *dev)
> >   {
> >   #define VTD_CAP_MGAW_SHIFT  16
> >   #define VTD_CAP_MGAW_MASK   (0x3fULL << VTD_CAP_MGAW_SHIFT)
> > -#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
> > +/*  From Documentation/x86/x86_64/mm.txt */
> > +#define X86_VA_WIDTH_PML4 47
> > +#define X86_VA_WIDTH_PML5 56
> > +
> >       struct rte_pci_addr *addr = &dev->addr;
> >       char filename[PATH_MAX];
> >       FILE *fp;
> > @@ -589,7 +612,7 @@
> >       fclose(fp);
> >
> >       mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) +
> 1;
> > -     if (mgaw < X86_VA_WIDTH)
> > +     if (mgaw < (system_uses_PML5() ? X86_VA_WIDTH_PML5 :
> X86_VA_WIDTH_PML4))
>
> This is perhaps nitpicking and a question of personal preferences, but i
> think storing this in a var would be more readable than doing ternary
> operator inside of an if statement.
>
> >               return false;
> >
> >       return true;
> >
>
>
> --
> Thanks,
> Anatoly
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width
  2018-08-09 17:03 ` Stephen Hemminger
@ 2018-08-10  8:35   ` Drocula
  2018-08-10  9:18     ` Burakov, Anatoly
  0 siblings, 1 reply; 9+ messages in thread
From: Drocula @ 2018-08-10  8:35 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: maxime.coquelin, dev

First, thanks for your suggestions.

When using the MAP_FIXED flag, mmap will return an MMAP_FAILED if
0xf0000000000000 is not available.

In this case, I want mmap to return an address near 0xf0000000000000.

I will submit v2.

On Fri, Aug 10, 2018, 01:03 Stephen Hemminger <stephen@networkplumber.org>
wrote:

> Thanks for the patch, there are some minor style/cleanups that
> could be done.
>
> >  #if defined(RTE_ARCH_X86)
>
> Isn't this going to apply to 64 bit only?
>
> > +/*
> > + * Try to detect whether the system uses 5-level page table.
> > + */
> > +static bool
> > +system_uses_PML5(void)
> > +{
> > +     void *page_4k, *mask = (void *)0xf0000000000000;
>
> Magic constants expressed like this seem wrong. Why not use
> shift to make it obvious.
>
> Also, you are assuming a particular layout of memory on
> Linux which might be problematic. Plus if there is already
> some memory in use there, it won't work.
>
> > +     page_4k = mmap(mask, 4096, PROT_READ | PROT_WRITE,
> > +             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
>
> Since you are probing maybe MAP_FIXED is what you want.
>
> > +
> > +     if (page_4k == (void *) -1)
> > +             return false;
> Use MMAP_FAILED here.
>
> > +     munmap(page_4k, 4096);
> > +
> > +     if ((unsigned long)page_4k & (unsigned long)mask)
> > +             return true;
> > +     return false;
>
> Wouldn't this work the same for what you expect?
>         return page_4k == mask;
>
> I.e you expect kernel to put page where you want.
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width
  2018-08-10  8:35   ` Drocula
@ 2018-08-10  9:18     ` Burakov, Anatoly
  0 siblings, 0 replies; 9+ messages in thread
From: Burakov, Anatoly @ 2018-08-10  9:18 UTC (permalink / raw)
  To: Drocula, Stephen Hemminger; +Cc: maxime.coquelin, dev

On 10-Aug-18 9:35 AM, Drocula wrote:
> First, thanks for your suggestions.
> 
> When using the MAP_FIXED flag, mmap will return an MMAP_FAILED if
> 0xf0000000000000 is not available.
> 
> In this case, I want mmap to return an address near 0xf0000000000000.
> 
> I will submit v2.

How can we be sure there's nothing mapped at that address? I think the 
original code was correct - try mapping around that address, and see if 
we get *something* close to it with the right bits set. MAP_FIXED seems 
dangerous to use without knowing that there's nothing there. Recent 
kernels have added a safer version of MAP_FIXED, but obviously it won't 
work on the majority of kernel versions we support.

> 
> On Fri, Aug 10, 2018, 01:03 Stephen Hemminger <stephen@networkplumber.org>
> wrote:
> 
>> Thanks for the patch, there are some minor style/cleanups that
>> could be done.
>>
>>>   #if defined(RTE_ARCH_X86)
>>
>> Isn't this going to apply to 64 bit only?
>>
>>> +/*
>>> + * Try to detect whether the system uses 5-level page table.
>>> + */
>>> +static bool
>>> +system_uses_PML5(void)
>>> +{
>>> +     void *page_4k, *mask = (void *)0xf0000000000000;
>>
>> Magic constants expressed like this seem wrong. Why not use
>> shift to make it obvious.
>>
>> Also, you are assuming a particular layout of memory on
>> Linux which might be problematic. Plus if there is already
>> some memory in use there, it won't work.
>>
>>> +     page_4k = mmap(mask, 4096, PROT_READ | PROT_WRITE,
>>> +             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
>>
>> Since you are probing maybe MAP_FIXED is what you want.
>>
>>> +
>>> +     if (page_4k == (void *) -1)
>>> +             return false;
>> Use MMAP_FAILED here.
>>
>>> +     munmap(page_4k, 4096);
>>> +
>>> +     if ((unsigned long)page_4k & (unsigned long)mask)
>>> +             return true;
>>> +     return false;
>>
>> Wouldn't this work the same for what you expect?
>>          return page_4k == mask;
>>
>> I.e you expect kernel to put page where you want.
>>
> 


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [dpdk-dev] [PATCH v2] bus/pci: check if 5-level paging is enabled when testing IOMMU address width
  2018-08-05 18:41 [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width Drocula
  2018-08-09 10:49 ` Burakov, Anatoly
  2018-08-09 17:03 ` Stephen Hemminger
@ 2018-08-13 12:57 ` Drocula
  2018-10-28 18:22   ` Thomas Monjalon
  2023-06-09 16:31 ` [PATCH] " Stephen Hemminger
  3 siblings, 1 reply; 9+ messages in thread
From: Drocula @ 2018-08-13 12:57 UTC (permalink / raw)
  Cc: dev, anatoly.burakov, stephen, Drocula

The kernel version 4.14 released with the support of 5-level paging.
When PML5 enabled, user-space virtual addresses uses up to 56 bits.
see kernel's Documentation/x86/x86_64/mm.txt.

Signed-off-by: ZY Qiu <quzeyao@gmail.com>
---
 drivers/bus/pci/linux/pci.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 04648ac..acc19df 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -4,6 +4,7 @@
 
 #include <string.h>
 #include <dirent.h>
+#include <sys/mman.h>
 
 #include <rte_log.h>
 #include <rte_bus.h>
@@ -552,16 +553,39 @@
 }
 
 #if defined(RTE_ARCH_X86)
+/*
+ * Try to detect whether the system uses 5-level page table.
+ */
+static bool
+system_uses_PML5(void)
+{
+#define X86_56_BIT_VA (0xfULL << 52)
+	void *page_4k;
+	page_4k = mmap((void *)X86_56_BIT_VA, 4096, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+	if (page_4k == MAP_FAILED)
+		return false;
+	munmap(page_4k, 4096);
+
+	if ((unsigned long long)page_4k & X86_56_BIT_VA)
+		return true;
+	return false;
+}
+
 static bool
 pci_one_device_iommu_support_va(struct rte_pci_device *dev)
 {
 #define VTD_CAP_MGAW_SHIFT	16
 #define VTD_CAP_MGAW_MASK	(0x3fULL << VTD_CAP_MGAW_SHIFT)
-#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
+/*  From Documentation/x86/x86_64/mm.txt */
+#define X86_VA_WIDTH_PML4 47
+#define X86_VA_WIDTH_PML5 56
+
 	struct rte_pci_addr *addr = &dev->addr;
 	char filename[PATH_MAX];
 	FILE *fp;
-	uint64_t mgaw, vtd_cap_reg = 0;
+	uint64_t mgaw, vtd_cap_reg = 0, va_width = X86_VA_WIDTH_PML4;
 
 	snprintf(filename, sizeof(filename),
 		 "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
@@ -587,8 +611,11 @@
 
 	fclose(fp);
 
+	if (system_uses_PML5())
+		va_width = X86_VA_WIDTH_PML5;
+
 	mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
-	if (mgaw < X86_VA_WIDTH)
+	if (mgaw < va_width)
 		return false;
 
 	return true;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v2] bus/pci: check if 5-level paging is enabled when testing IOMMU address width
  2018-08-13 12:57 ` [dpdk-dev] [PATCH v2] " Drocula
@ 2018-10-28 18:22   ` Thomas Monjalon
  0 siblings, 0 replies; 9+ messages in thread
From: Thomas Monjalon @ 2018-10-28 18:22 UTC (permalink / raw)
  To: Drocula, anatoly.burakov, stephen, alejandro.lucero; +Cc: dev

Any review please?

It may require some change after Alejandro patches.

13/08/2018 14:57, Drocula:
> The kernel version 4.14 released with the support of 5-level paging.
> When PML5 enabled, user-space virtual addresses uses up to 56 bits.
> see kernel's Documentation/x86/x86_64/mm.txt.
> 
> Signed-off-by: ZY Qiu <quzeyao@gmail.com>

You used a different name in another patch.
Which one should be used?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width
  2018-08-05 18:41 [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width Drocula
                   ` (2 preceding siblings ...)
  2018-08-13 12:57 ` [dpdk-dev] [PATCH v2] " Drocula
@ 2023-06-09 16:31 ` Stephen Hemminger
  3 siblings, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2023-06-09 16:31 UTC (permalink / raw)
  To: quzeyao; +Cc: dev, maxime.coquelin

This patch is no long relevant since the current DPDK code
no longer depends on VA width.  It should be rejected.


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-06-09 16:31 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-05 18:41 [dpdk-dev] [PATCH] bus/pci: check if 5-level paging is enabled when testing IOMMU address width Drocula
2018-08-09 10:49 ` Burakov, Anatoly
2018-08-10  7:51   ` Drocula
2018-08-09 17:03 ` Stephen Hemminger
2018-08-10  8:35   ` Drocula
2018-08-10  9:18     ` Burakov, Anatoly
2018-08-13 12:57 ` [dpdk-dev] [PATCH v2] " Drocula
2018-10-28 18:22   ` Thomas Monjalon
2023-06-09 16:31 ` [PATCH] " Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).