* [dpdk-dev] [PATCH v2 1/6] mem: add function for checking memsegs IOVAs addresses
2018-10-05 12:06 [dpdk-dev] [PATCH v2 0/6] use IOVAs check based on DMA mask Alejandro Lucero
@ 2018-10-05 12:06 ` Alejandro Lucero
2018-10-05 12:15 ` Burakov, Anatoly
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 2/6] mem: use address hint for mapping hugepages Alejandro Lucero
` (4 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Alejandro Lucero @ 2018-10-05 12:06 UTC (permalink / raw)
To: dev
A device can suffer addressing limitations. This function checks
memsegs have iovas within the supported range based on dma mask.
PMDs should use this function during initialization if device
suffers addressing limitations, returning an error if this function
returns memsegs out of range.
Another usage is for emulated IOMMU hardware with addressing
limitations.
It is necessary to save the most restricted dma mask for checking out
memory allocated dynamically after initialization.
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
---
doc/guides/rel_notes/release_18_11.rst | 10 ++++
lib/librte_eal/common/eal_common_memory.c | 64 +++++++++++++++++++++++
lib/librte_eal/common/include/rte_eal_memconfig.h | 3 ++
lib/librte_eal/common/include/rte_memory.h | 3 ++
lib/librte_eal/common/malloc_heap.c | 12 +++++
lib/librte_eal/linuxapp/eal/eal.c | 2 +
lib/librte_eal/rte_eal_version.map | 1 +
7 files changed, 95 insertions(+)
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index 2133a5b..c806dc6 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -104,6 +104,14 @@ New Features
the specified port. The port must be stopped before the command call in order
to reconfigure queues.
+* **Added check for ensuring allocated memory addressable by devices.**
+
+ Some devices can have addressing limitations so a new function,
+ ``rte_eal_check_dma_mask``, has been added for checking allocated memory is
+ not out of the device range. Because now memory can be dynamically allocated
+ after initialization, a dma mask is kept and any new allocated memory will be
+ checked out against that dma mask and rejected if out of range. If more than
+ one device has addressing limitations, the dma mask is the more restricted one.
API Changes
-----------
@@ -156,6 +164,8 @@ ABI Changes
``rte_config`` structure on account of improving DPDK usability when
using either ``--legacy-mem`` or ``--single-file-segments`` flags.
+* eal: added ``dma_maskbits`` to ``rte_mem_config`` for keeping more restricted
+ dma mask based on devices addressing limitations.
Removed Items
-------------
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 0b69804..7555e76 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -385,6 +385,70 @@ struct virtiova {
rte_memseg_walk(dump_memseg, f);
}
+static int
+check_iova(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, void *arg)
+{
+ uint64_t *mask = arg;
+ rte_iova_t iova;
+
+ /* higher address within segment */
+ iova = (ms->iova + ms->len) - 1;
+ if (!(iova & *mask))
+ return 0;
+
+ RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n",
+ ms->iova, ms->len);
+
+ RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask);
+ return 1;
+}
+
+#if defined(RTE_ARCH_64)
+#define MAX_DMA_MASK_BITS 63
+#else
+#define MAX_DMA_MASK_BITS 31
+#endif
+
+/* check memseg iovas are within the required range based on dma mask */
+int __rte_experimental
+rte_eal_check_dma_mask(uint8_t maskbits)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ uint64_t mask;
+
+ /* sanity check */
+ if (maskbits > MAX_DMA_MASK_BITS) {
+ RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
+ maskbits, MAX_DMA_MASK_BITS);
+ return -1;
+ }
+
+ /* keep the more restricted maskbit */
+ if (!mcfg->dma_maskbits || maskbits < mcfg->dma_maskbits)
+ mcfg->dma_maskbits = maskbits;
+
+ /* create dma mask */
+ mask = ~((1ULL << maskbits) - 1);
+
+ if (rte_memseg_walk(check_iova, &mask))
+ /*
+ * Dma mask precludes hugepage usage.
+ * This device can not be used and we do not need to keep
+ * the dma mask.
+ */
+ return 1;
+
+ /*
+ * we need to keep the more restricted maskbit for checking
+ * potential dynamic memory allocation in the future.
+ */
+ mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+ RTE_MIN(mcfg->dma_maskbits, maskbits);
+
+ return 0;
+}
+
/* return the number of memory channels */
unsigned rte_memory_get_nchannel(void)
{
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index 62a21c2..b5dff70 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -81,6 +81,9 @@ struct rte_mem_config {
/* legacy mem and single file segments options are shared */
uint32_t legacy_mem;
uint32_t single_file_segments;
+
+ /* keeps the more restricted dma mask */
+ uint8_t dma_maskbits;
} __attribute__((__packed__));
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 14bd277..c349d6c 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -454,6 +454,9 @@ typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
*/
unsigned rte_memory_get_nrank(void);
+/* check memsegs iovas are within a range based on dma mask */
+int rte_eal_check_dma_mask(uint8_t maskbits);
+
/**
* Drivers based on uio will not load unless physical
* addresses are obtainable. It is only possible to get
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index ac7bbb3..3b5b2b6 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -259,11 +259,13 @@ struct malloc_elem *
int socket, unsigned int flags, size_t align, size_t bound,
bool contig, struct rte_memseg **ms, int n_segs)
{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
struct rte_memseg_list *msl;
struct malloc_elem *elem = NULL;
size_t alloc_sz;
int allocd_pages;
void *ret, *map_addr;
+ uint64_t mask;
alloc_sz = (size_t)pg_sz * n_segs;
@@ -291,6 +293,16 @@ struct malloc_elem *
goto fail;
}
+ if (mcfg->dma_maskbits) {
+ mask = ~((1ULL << mcfg->dma_maskbits) - 1);
+ if (rte_eal_check_dma_mask(mask)) {
+ RTE_LOG(ERR, EAL,
+ "%s(): couldn't allocate memory due to DMA mask\n",
+ __func__);
+ goto fail;
+ }
+ }
+
/* add newly minted memsegs to malloc heap */
elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 4a55d3b..dfe1b8c 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -263,6 +263,8 @@ enum rte_iova_mode
* processes could later map the config into this exact location */
rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
+ rte_config.mem_config->dma_maskbits = 0;
+
}
/* attach to an existing shared memory config */
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 73282bb..2baefce 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -291,6 +291,7 @@ EXPERIMENTAL {
rte_devargs_parsef;
rte_devargs_remove;
rte_devargs_type_count;
+ rte_eal_check_dma_mask;
rte_eal_cleanup;
rte_eal_hotplug_add;
rte_eal_hotplug_remove;
--
1.9.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/6] mem: add function for checking memsegs IOVAs addresses
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 1/6] mem: add function for checking memsegs IOVAs addresses Alejandro Lucero
@ 2018-10-05 12:15 ` Burakov, Anatoly
2018-10-05 12:19 ` Alejandro Lucero
0 siblings, 1 reply; 15+ messages in thread
From: Burakov, Anatoly @ 2018-10-05 12:15 UTC (permalink / raw)
To: Alejandro Lucero, dev
On 05-Oct-18 1:06 PM, Alejandro Lucero wrote:
> A device can suffer addressing limitations. This function checks
> memsegs have iovas within the supported range based on dma mask.
>
> PMDs should use this function during initialization if device
> suffers addressing limitations, returning an error if this function
> returns memsegs out of range.
>
> Another usage is for emulated IOMMU hardware with addressing
> limitations.
>
> It is necessary to save the most restricted dma mask for checking out
> memory allocated dynamically after initialization.
>
> Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
> ---
Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
--
Thanks,
Anatoly
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/6] mem: add function for checking memsegs IOVAs addresses
2018-10-05 12:15 ` Burakov, Anatoly
@ 2018-10-05 12:19 ` Alejandro Lucero
2018-10-05 12:20 ` Burakov, Anatoly
0 siblings, 1 reply; 15+ messages in thread
From: Alejandro Lucero @ 2018-10-05 12:19 UTC (permalink / raw)
To: Burakov, Anatoly; +Cc: dev
On Fri, Oct 5, 2018 at 1:15 PM Burakov, Anatoly <anatoly.burakov@intel.com>
wrote:
> On 05-Oct-18 1:06 PM, Alejandro Lucero wrote:
> > A device can suffer addressing limitations. This function checks
> > memsegs have iovas within the supported range based on dma mask.
> >
> > PMDs should use this function during initialization if device
> > suffers addressing limitations, returning an error if this function
> > returns memsegs out of range.
> >
> > Another usage is for emulated IOMMU hardware with addressing
> > limitations.
> >
> > It is necessary to save the most restricted dma mask for checking out
> > memory allocated dynamically after initialization.
> >
> > Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
> > ---
>
> Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
>
>
Thanks Anatoly.
I have just noticed I did not remove the previous keeping of the dma mask.
I will send another version just with this change and will add you review
if you are happy if I do so.
> --
> Thanks,
> Anatoly
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/6] mem: add function for checking memsegs IOVAs addresses
2018-10-05 12:19 ` Alejandro Lucero
@ 2018-10-05 12:20 ` Burakov, Anatoly
0 siblings, 0 replies; 15+ messages in thread
From: Burakov, Anatoly @ 2018-10-05 12:20 UTC (permalink / raw)
To: Alejandro Lucero; +Cc: dev
On 05-Oct-18 1:19 PM, Alejandro Lucero wrote:
>
>
> On Fri, Oct 5, 2018 at 1:15 PM Burakov, Anatoly
> <anatoly.burakov@intel.com <mailto:anatoly.burakov@intel.com>> wrote:
>
> On 05-Oct-18 1:06 PM, Alejandro Lucero wrote:
> > A device can suffer addressing limitations. This function checks
> > memsegs have iovas within the supported range based on dma mask.
> >
> > PMDs should use this function during initialization if device
> > suffers addressing limitations, returning an error if this function
> > returns memsegs out of range.
> >
> > Another usage is for emulated IOMMU hardware with addressing
> > limitations.
> >
> > It is necessary to save the most restricted dma mask for checking out
> > memory allocated dynamically after initialization.
> >
> > Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com
> <mailto:alejandro.lucero@netronome.com>>
> > ---
>
> Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com
> <mailto:anatoly.burakov@intel.com>>
>
>
> Thanks Anatoly.
>
> I have just noticed I did not remove the previous keeping of the dma mask.
>
> I will send another version just with this change and will add you
> review if you are happy if I do so.
>
Sure :)
--
Thanks,
Anatoly
^ permalink raw reply [flat|nested] 15+ messages in thread
* [dpdk-dev] [PATCH v2 2/6] mem: use address hint for mapping hugepages
2018-10-05 12:06 [dpdk-dev] [PATCH v2 0/6] use IOVAs check based on DMA mask Alejandro Lucero
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 1/6] mem: add function for checking memsegs IOVAs addresses Alejandro Lucero
@ 2018-10-05 12:06 ` Alejandro Lucero
2018-10-05 12:16 ` Burakov, Anatoly
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 3/6] bus/pci: check iommu addressing limitation just once Alejandro Lucero
` (3 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Alejandro Lucero @ 2018-10-05 12:06 UTC (permalink / raw)
To: dev
Linux kernel uses a really high address as starting address for
serving mmaps calls. If there exist addressing limitations and
IOVA mode is VA, this starting address is likely too high for
those devices. However, it is possible to use a lower address in
the process virtual address space as with 64 bits there is a lot
of available space.
This patch adds an address hint as starting address for 64 bits
systems and increments the hint for next invocations. If the mmap
call does not use the hint address, repeat the mmap call using
the hint address incremented by page size.
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
---
lib/librte_eal/common/eal_common_memory.c | 34 ++++++++++++++++++++++++++++++-
1 file changed, 33 insertions(+), 1 deletion(-)
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 7555e76..366e454 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -37,6 +37,23 @@
static void *next_baseaddr;
static uint64_t system_page_sz;
+#ifdef RTE_ARCH_64
+/*
+ * Linux kernel uses a really high address as starting address for serving
+ * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
+ * this starting address is likely too high for those devices. However, it
+ * is possible to use a lower address in the process virtual address space
+ * as with 64 bits there is a lot of available space.
+ *
+ * Current known limitations are 39 or 40 bits. Setting the starting address
+ * at 4GB implies there are 508GB or 1020GB for mapping the available
+ * hugepages. This is likely enough for most systems, although a device with
+ * addressing limitations should call rte_eal_check_dma_mask for ensuring all
+ * memory is within supported range.
+ */
+static uint64_t baseaddr = 0x100000000;
+#endif
+
void *
eal_get_virtual_area(void *requested_addr, size_t *size,
size_t page_sz, int flags, int mmap_flags)
@@ -60,6 +77,11 @@
rte_eal_process_type() == RTE_PROC_PRIMARY)
next_baseaddr = (void *) internal_config.base_virtaddr;
+#ifdef RTE_ARCH_64
+ if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY)
+ next_baseaddr = (void *) baseaddr;
+#endif
if (requested_addr == NULL && next_baseaddr != NULL) {
requested_addr = next_baseaddr;
requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
@@ -91,7 +113,17 @@
mmap_flags, -1, 0);
if (mapped_addr == MAP_FAILED && allow_shrink)
*size -= page_sz;
- } while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0);
+
+ if (mapped_addr != MAP_FAILED && addr_is_hint &&
+ mapped_addr != requested_addr) {
+ /* hint was not used. Try with another offset */
+ munmap(mapped_addr, map_sz);
+ mapped_addr = MAP_FAILED;
+ next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
+ requested_addr = next_baseaddr;
+ }
+ } while ((allow_shrink || addr_is_hint) &&
+ mapped_addr == MAP_FAILED && *size > 0);
/* align resulting address - if map failed, we will ignore the value
* anyway, so no need to add additional checks.
--
1.9.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/6] mem: use address hint for mapping hugepages
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 2/6] mem: use address hint for mapping hugepages Alejandro Lucero
@ 2018-10-05 12:16 ` Burakov, Anatoly
0 siblings, 0 replies; 15+ messages in thread
From: Burakov, Anatoly @ 2018-10-05 12:16 UTC (permalink / raw)
To: Alejandro Lucero, dev
On 05-Oct-18 1:06 PM, Alejandro Lucero wrote:
> Linux kernel uses a really high address as starting address for
> serving mmaps calls. If there exist addressing limitations and
> IOVA mode is VA, this starting address is likely too high for
> those devices. However, it is possible to use a lower address in
> the process virtual address space as with 64 bits there is a lot
> of available space.
>
> This patch adds an address hint as starting address for 64 bits
> systems and increments the hint for next invocations. If the mmap
> call does not use the hint address, repeat the mmap call using
> the hint address incremented by page size.
>
> Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
> ---
Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
--
Thanks,
Anatoly
^ permalink raw reply [flat|nested] 15+ messages in thread
* [dpdk-dev] [PATCH v2 3/6] bus/pci: check iommu addressing limitation just once
2018-10-05 12:06 [dpdk-dev] [PATCH v2 0/6] use IOVAs check based on DMA mask Alejandro Lucero
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 1/6] mem: add function for checking memsegs IOVAs addresses Alejandro Lucero
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 2/6] mem: use address hint for mapping hugepages Alejandro Lucero
@ 2018-10-05 12:06 ` Alejandro Lucero
2018-10-05 12:16 ` Burakov, Anatoly
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 4/6] bus/pci: use IOVAs dmak mask check when setting IOVA mode Alejandro Lucero
` (2 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Alejandro Lucero @ 2018-10-05 12:06 UTC (permalink / raw)
To: dev
Current code checks if IOMMU hardware reports enough addressing
bits for using IOVA mode but it repeats the same check for any
PCI device present. This is not necessary because the IOMMU hardware
is the same for all of them.
This patch only checks the IOMMU using first PCI device found.
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
---
drivers/bus/pci/linux/pci.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 04648ac..a871549 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -620,8 +620,11 @@
FOREACH_DEVICE_ON_PCIBUS(dev) {
if (!rte_pci_match(drv, dev))
continue;
- if (!pci_one_device_iommu_support_va(dev))
- return false;
+ /*
+ * just one PCI device needs to be checked out because
+ * the IOMMU hardware is the same for all of them.
+ */
+ return pci_one_device_iommu_support_va(dev);
}
}
return true;
--
1.9.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* [dpdk-dev] [PATCH v2 4/6] bus/pci: use IOVAs dmak mask check when setting IOVA mode
2018-10-05 12:06 [dpdk-dev] [PATCH v2 0/6] use IOVAs check based on DMA mask Alejandro Lucero
` (2 preceding siblings ...)
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 3/6] bus/pci: check iommu addressing limitation just once Alejandro Lucero
@ 2018-10-05 12:06 ` Alejandro Lucero
2018-10-05 12:18 ` Burakov, Anatoly
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 5/6] net/nfp: check hugepages IOVAs based on DMA mask Alejandro Lucero
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 6/6] net/nfp: support IOVA VA mode Alejandro Lucero
5 siblings, 1 reply; 15+ messages in thread
From: Alejandro Lucero @ 2018-10-05 12:06 UTC (permalink / raw)
To: dev
Currently the code precludes IOVA mode if IOMMU hardware reports
less addressing bits than necessary for full virtual memory range.
Although VT-d emulation currently only supports 39 bits, it could
be iovas for allocated memlory being within that supported range.
This patch allows IOVA mode in such a case adding a call to
rte_eal_check_dma_mask using the reported addressing bits by the
IOMMU hardware.
Indeed, memory initialization code has been modified for using lower
virtual addresses than those used by the kernel for 64 bits processes
by default, and therefore memsegs iovas can use 39 bits or less for
most systems. And this is likely 100% true for VMs.
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
---
drivers/bus/pci/linux/pci.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index a871549..5cf78d7 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -588,10 +588,8 @@
fclose(fp);
mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
- if (mgaw < X86_VA_WIDTH)
- return false;
- return true;
+ return rte_eal_check_dma_mask(mgaw) == 0 ? true : false;
}
#elif defined(RTE_ARCH_PPC_64)
static bool
--
1.9.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/6] bus/pci: use IOVAs dmak mask check when setting IOVA mode
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 4/6] bus/pci: use IOVAs dmak mask check when setting IOVA mode Alejandro Lucero
@ 2018-10-05 12:18 ` Burakov, Anatoly
0 siblings, 0 replies; 15+ messages in thread
From: Burakov, Anatoly @ 2018-10-05 12:18 UTC (permalink / raw)
To: Alejandro Lucero, dev
On 05-Oct-18 1:06 PM, Alejandro Lucero wrote:
> Currently the code precludes IOVA mode if IOMMU hardware reports
> less addressing bits than necessary for full virtual memory range.
>
> Although VT-d emulation currently only supports 39 bits, it could
> be iovas for allocated memlory being within that supported range.
^^ memory
> This patch allows IOVA mode in such a case adding a call to
> rte_eal_check_dma_mask using the reported addressing bits by the
> IOMMU hardware.
>
> Indeed, memory initialization code has been modified for using lower
> virtual addresses than those used by the kernel for 64 bits processes
> by default, and therefore memsegs iovas can use 39 bits or less for
> most systems. And this is likely 100% true for VMs.
>
> Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
> ---
> drivers/bus/pci/linux/pci.c | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index a871549..5cf78d7 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -588,10 +588,8 @@
> fclose(fp);
>
> mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
> - if (mgaw < X86_VA_WIDTH)
> - return false;
>
> - return true;
> + return rte_eal_check_dma_mask(mgaw) == 0 ? true : false;
Still looks weird but OK, no big deal :)
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
> }
> #elif defined(RTE_ARCH_PPC_64)
> static bool
>
--
Thanks,
Anatoly
^ permalink raw reply [flat|nested] 15+ messages in thread
* [dpdk-dev] [PATCH v2 5/6] net/nfp: check hugepages IOVAs based on DMA mask
2018-10-05 12:06 [dpdk-dev] [PATCH v2 0/6] use IOVAs check based on DMA mask Alejandro Lucero
` (3 preceding siblings ...)
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 4/6] bus/pci: use IOVAs dmak mask check when setting IOVA mode Alejandro Lucero
@ 2018-10-05 12:06 ` Alejandro Lucero
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 6/6] net/nfp: support IOVA VA mode Alejandro Lucero
5 siblings, 0 replies; 15+ messages in thread
From: Alejandro Lucero @ 2018-10-05 12:06 UTC (permalink / raw)
To: dev
NFP devices can not handle DMA addresses requiring more than
40 bits. This patch uses rte_dev_check_dma_mask with 40 bits
and avoids device initialization if memory out of NFP range.
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
---
drivers/net/nfp/nfp_net.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index 170b5d6..3910980 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -2680,6 +2680,14 @@ uint32_t nfp_net_txq_full(struct nfp_net_txq *txq)
pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+ /* NFP can not handle DMA addresses requiring more than 40 bits */
+ if (rte_eal_check_dma_mask(40)) {
+ RTE_LOG(ERR, PMD, "device %s can not be used:",
+ pci_dev->device.name);
+ RTE_LOG(ERR, PMD, "\trestricted dma mask to 40 bits!\n");
+ return -ENODEV;
+ };
+
if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) ||
(pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) {
port = get_pf_port_number(eth_dev->data->name);
--
1.9.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* [dpdk-dev] [PATCH v2 6/6] net/nfp: support IOVA VA mode
2018-10-05 12:06 [dpdk-dev] [PATCH v2 0/6] use IOVAs check based on DMA mask Alejandro Lucero
` (4 preceding siblings ...)
2018-10-05 12:06 ` [dpdk-dev] [PATCH v2 5/6] net/nfp: check hugepages IOVAs based on DMA mask Alejandro Lucero
@ 2018-10-05 12:06 ` Alejandro Lucero
5 siblings, 0 replies; 15+ messages in thread
From: Alejandro Lucero @ 2018-10-05 12:06 UTC (permalink / raw)
To: dev
NFP can handle IOVA as VA. It requires to check those IOVAs
being in the supported range what is done during initialization.
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
---
drivers/net/nfp/nfp_net.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index 3910980..cc73b0b 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -3268,14 +3268,16 @@ static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev)
static struct rte_pci_driver rte_nfp_net_pf_pmd = {
.id_table = pci_id_nfp_pf_net_map,
- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+ RTE_PCI_DRV_IOVA_AS_VA,
.probe = nfp_pf_pci_probe,
.remove = eth_nfp_pci_remove,
};
static struct rte_pci_driver rte_nfp_net_vf_pmd = {
.id_table = pci_id_nfp_vf_net_map,
- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+ RTE_PCI_DRV_IOVA_AS_VA,
.probe = eth_nfp_pci_probe,
.remove = eth_nfp_pci_remove,
};
--
1.9.1
^ permalink raw reply [flat|nested] 15+ messages in thread