From: Bruce Richardson <bruce.richardson@intel.com>
To: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages
Date: Thu, 13 Nov 2014 13:46:55 +0000 [thread overview]
Message-ID: <20141113134655.GA8224@bricha3-MOBL3> (raw)
In-Reply-To: <1415700565-19157-1-git-send-email-anatoly.burakov@intel.com>
On Tue, Nov 11, 2014 at 10:09:25AM +0000, Anatoly Burakov wrote:
> Multi-process DPDK application must mmap hugepages and PCI resources
> into the same virtual address space. By default the virtual addresses
> are chosen by the primary process automatically when calling the mmap.
> But sometimes the chosen virtual addresses aren't usable in secondary
> process - for example, secondary process is linked with more libraries
> than primary process, and the library occupies the same address space
> that the primary process has requested for PCI mappings.
>
> This patch makes EAL try and map PCI BARs right after the hugepages
> (instead of location chosen by mmap) in virtual memory, so that PCI BARs
> have less chance of ending up in random places in virtual memory.
>
> Signed-off-by: Liang Xu <liang.xu@cinfotech.cn>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
> lib/librte_eal/linuxapp/eal/eal_pci.c | 30 ++++++++++++++++------
> lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 13 ++++++++--
> lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 19 +++++++++++---
> lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 6 +++++
> 4 files changed, 55 insertions(+), 13 deletions(-)
>
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 5fe3961..79fbbb8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -97,6 +97,25 @@ error:
> return -1;
> }
>
> +void *
> +pci_find_max_end_va(void)
> +{
> + const struct rte_memseg *seg = rte_eal_get_physmem_layout();
> + const struct rte_memseg *last = seg;
> + unsigned i = 0;
> +
> + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
> + if (seg->addr == NULL)
> + break;
> +
> + if (seg->addr > last->addr)
> + last = seg;
> +
> + }
> + return RTE_PTR_ADD(last->addr, last->len);
> +}
> +
> +
> /* map a particular resource from a file */
> void *
> pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
> @@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
> /* Map the PCI memory resource of device */
> mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
> MAP_SHARED, fd, offset);
> - if (mapaddr == MAP_FAILED ||
> - (requested_addr != NULL && mapaddr != requested_addr)) {
> + if (mapaddr == MAP_FAILED) {
> RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
> __func__, fd, requested_addr,
> (unsigned long)size, (unsigned long)offset,
> strerror(errno), mapaddr);
> - goto fail;
> + } else {
> + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr);
> }
>
> - RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr);
> -
> return mapaddr;
> -
> -fail:
> - return NULL;
> }
>
> /* parse the "resource" sysfs file */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> index 7e62266..e53f06b 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> @@ -35,6 +35,7 @@
> #include <fcntl.h>
> #include <dirent.h>
> #include <sys/stat.h>
> +#include <sys/mman.h>
>
> #include <rte_log.h>
> #include <rte_pci.h>
> @@ -48,6 +49,8 @@
>
> static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
>
> +void *pci_map_addr = NULL;
> +
>
> #define OFF_MAX ((uint64_t)(off_t)-1)
> static int
> @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev)
> if (maps[j].addr != NULL)
> fail = 1;
> else {
> - mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
> + /* try mapping somewhere close to the end of hugepages */
> + if (pci_map_addr == NULL)
> + pci_map_addr = pci_find_max_end_va();
> +
> + mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset,
> (size_t)maps[j].size);
> - if (mapaddr == NULL)
> + if (mapaddr == MAP_FAILED)
> fail = 1;
> +
> + pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) maps[j].size);
> }
>
> if (fail) {
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index c776ddc..c1246e8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -37,6 +37,7 @@
> #include <sys/eventfd.h>
> #include <sys/socket.h>
> #include <sys/ioctl.h>
> +#include <sys/mman.h>
>
> #include <rte_log.h>
> #include <rte_pci.h>
> @@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
> if (i == msix_bar)
> continue;
>
> - bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
> - reg.size);
> + if (internal_config.process_type == RTE_PROC_PRIMARY) {
> + /* try mapping somewhere close to the end of hugepages */
> + if (pci_map_addr == NULL)
> + pci_map_addr = pci_find_max_end_va();
> +
> + bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset,
> + reg.size);
> + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
> + } else {
> + bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
> + reg.size);
> + }
>
> - if (bar_addr == NULL) {
> + if (bar_addr == MAP_FAILED ||
> + (internal_config.process_type == RTE_PROC_SECONDARY &&
> + bar_addr != maps[i].addr)) {
> RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i,
> strerror(errno));
> close(vfio_dev_fd);
> diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> index d758bee..1070eb8 100644
> --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> @@ -59,6 +59,12 @@ struct mapped_pci_resource {
> TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
> extern struct mapped_pci_res_list *pci_res_list;
>
> +/*
> + * Helper function to map PCI resources right after hugepages in virtual memory
> + */
> +extern void *pci_map_addr;
> +void *pci_find_max_end_va(void);
> +
> void *pci_map_resource(void *requested_addr, int fd, off_t offset,
> size_t size);
>
> --
> 1.8.1.4
>
next prev parent reply other threads:[~2014-11-13 13:37 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-11-05 13:25 [dpdk-dev] [PATCH] eal: map uio resources after hugepages when the base_virtaddr is configured lxu
2014-11-05 15:10 ` Burakov, Anatoly
2014-11-05 15:49 ` [dpdk-dev] 答复: " XU Liang
2014-11-05 15:59 ` Burakov, Anatoly
2014-11-05 16:10 ` [dpdk-dev] 答复:答复: " XU Liang
2014-11-26 1:46 ` Qiu, Michael
2014-11-26 9:58 ` Burakov, Anatoly
2014-11-06 14:11 ` [dpdk-dev] [PATCH v2] " lxu
2014-11-06 14:27 ` Burakov, Anatoly
2014-11-06 14:48 ` [dpdk-dev] 答复:[PATCH " 徐亮
2014-11-06 14:47 ` [dpdk-dev] [PATCH v3] " lxu
2014-11-06 15:06 ` De Lara Guarch, Pablo
2014-11-06 15:07 ` [dpdk-dev] [PATCH v4] " lxu
2014-11-06 15:12 ` Thomas Monjalon
2014-11-06 15:11 ` lxu
2014-11-06 15:32 ` [dpdk-dev] [PATCH v5] " lxu
2014-11-06 15:41 ` Burakov, Anatoly
2014-11-06 15:58 ` Thomas Monjalon
2014-11-06 16:10 ` Burakov, Anatoly
2014-11-06 17:30 ` Bruce Richardson
2014-11-07 8:01 ` [dpdk-dev] [PATCH v6] " lxu
2014-11-07 9:42 ` Bruce Richardson
2014-11-07 9:47 ` Burakov, Anatoly
2014-11-07 9:57 ` XU Liang
2014-11-07 14:37 ` XU Liang
2014-11-10 11:34 ` [dpdk-dev] [PATCH v7] eal: map PCI memory resources after hugepages Anatoly Burakov
2014-11-10 13:33 ` Burakov, Anatoly
2014-11-11 3:53 ` XU Liang
2014-11-11 10:09 ` [dpdk-dev] [PATCH v8] " Anatoly Burakov
2014-11-13 11:34 ` Burakov, Anatoly
2014-11-13 12:58 ` Bruce Richardson
2014-11-13 13:44 ` Burakov, Anatoly
2014-11-13 13:46 ` Bruce Richardson [this message]
2014-11-25 17:17 ` Thomas Monjalon
2014-11-07 14:57 ` [dpdk-dev] [PATCH v7] eal: map uio " lxu
2014-11-07 15:14 ` Burakov, Anatoly
2014-11-07 15:15 ` Thomas Monjalon
2014-11-07 15:19 ` XU Liang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141113134655.GA8224@bricha3-MOBL3 \
--to=bruce.richardson@intel.com \
--cc=anatoly.burakov@intel.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).