DPDK patches and discussions
 help / color / mirror / Atom feed
From: Bruce Richardson <bruce.richardson@intel.com>
To: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages
Date: Thu, 13 Nov 2014 13:46:55 +0000
Message-ID: <20141113134655.GA8224@bricha3-MOBL3> (raw)
In-Reply-To: <1415700565-19157-1-git-send-email-anatoly.burakov@intel.com>

On Tue, Nov 11, 2014 at 10:09:25AM +0000, Anatoly Burakov wrote:
> Multi-process DPDK application must mmap hugepages and PCI resources
> into the same virtual address space. By default the virtual addresses
> are chosen by the primary process automatically when calling the mmap.
> But sometimes the chosen virtual addresses aren't usable in secondary
> process - for example, secondary process is linked with more libraries
> than primary process, and the library occupies the same address space
> that the primary process has requested for PCI mappings.
> 
> This patch makes EAL try and map PCI BARs right after the hugepages
> (instead of location chosen by mmap) in virtual memory, so that PCI BARs
> have less chance of ending up in random places in virtual memory.
> 
> Signed-off-by: Liang Xu <liang.xu@cinfotech.cn>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>

Acked-by: Bruce Richardson <bruce.richardson@intel.com>

> ---
>  lib/librte_eal/linuxapp/eal/eal_pci.c              | 30 ++++++++++++++++------
>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c          | 13 ++++++++--
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 19 +++++++++++---
>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h |  6 +++++
>  4 files changed, 55 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 5fe3961..79fbbb8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -97,6 +97,25 @@ error:
>  	return -1;
>  }
>  
> +void *
> +pci_find_max_end_va(void)
> +{
> +	const struct rte_memseg *seg = rte_eal_get_physmem_layout();
> +	const struct rte_memseg *last = seg;
> +	unsigned i = 0;
> +
> +	for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
> +		if (seg->addr == NULL)
> +			break;
> +
> +		if (seg->addr > last->addr)
> +			last = seg;
> +
> +	}
> +	return RTE_PTR_ADD(last->addr, last->len);
> +}
> +
> +
>  /* map a particular resource from a file */
>  void *
>  pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
> @@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
>  	/* Map the PCI memory resource of device */
>  	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
>  			MAP_SHARED, fd, offset);
> -	if (mapaddr == MAP_FAILED ||
> -			(requested_addr != NULL && mapaddr != requested_addr)) {
> +	if (mapaddr == MAP_FAILED) {
>  		RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
>  			__func__, fd, requested_addr,
>  			(unsigned long)size, (unsigned long)offset,
>  			strerror(errno), mapaddr);
> -		goto fail;
> +	} else {
> +		RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
>  	}
>  
> -	RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
> -
>  	return mapaddr;
> -
> -fail:
> -	return NULL;
>  }
>  
>  /* parse the "resource" sysfs file */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> index 7e62266..e53f06b 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> @@ -35,6 +35,7 @@
>  #include <fcntl.h>
>  #include <dirent.h>
>  #include <sys/stat.h>
> +#include <sys/mman.h>
>  
>  #include <rte_log.h>
>  #include <rte_pci.h>
> @@ -48,6 +49,8 @@
>  
>  static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
>  
> +void *pci_map_addr = NULL;
> +
>  
>  #define OFF_MAX              ((uint64_t)(off_t)-1)
>  static int
> @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>  			if (maps[j].addr != NULL)
>  				fail = 1;
>  			else {
> -				mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
> +				/* try mapping somewhere close to the end of hugepages */
> +				if (pci_map_addr == NULL)
> +					pci_map_addr = pci_find_max_end_va();
> +
> +				mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset,
>  						(size_t)maps[j].size);
> -				if (mapaddr == NULL)
> +				if (mapaddr == MAP_FAILED)
>  					fail = 1;
> +
> +				pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) maps[j].size);
>  			}
>  
>  			if (fail) {
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index c776ddc..c1246e8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -37,6 +37,7 @@
>  #include <sys/eventfd.h>
>  #include <sys/socket.h>
>  #include <sys/ioctl.h>
> +#include <sys/mman.h>
>  
>  #include <rte_log.h>
>  #include <rte_pci.h>
> @@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
>  		if (i == msix_bar)
>  			continue;
>  
> -		bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
> -				reg.size);
> +		if (internal_config.process_type == RTE_PROC_PRIMARY) {
> +			/* try mapping somewhere close to the end of hugepages */
> +			if (pci_map_addr == NULL)
> +				pci_map_addr = pci_find_max_end_va();
> +
> +			bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset,
> +					reg.size);
> +			pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
> +		} else {
> +			bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
> +					reg.size);
> +		}
>  
> -		if (bar_addr == NULL) {
> +		if (bar_addr == MAP_FAILED ||
> +				(internal_config.process_type == RTE_PROC_SECONDARY &&
> +						bar_addr != maps[i].addr)) {
>  			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n", pci_addr, i,
>  					strerror(errno));
>  			close(vfio_dev_fd);
> diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> index d758bee..1070eb8 100644
> --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> @@ -59,6 +59,12 @@ struct mapped_pci_resource {
>  TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
>  extern struct mapped_pci_res_list *pci_res_list;
>  
> +/*
> + * Helper function to map PCI resources right after hugepages in virtual memory
> + */
> +extern void *pci_map_addr;
> +void *pci_find_max_end_va(void);
> +
>  void *pci_map_resource(void *requested_addr, int fd, off_t offset,
>  		size_t size);
>  
> -- 
> 1.8.1.4
> 

  parent reply	other threads:[~2014-11-13 13:37 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-05 13:25 [dpdk-dev] [PATCH] eal: map uio resources after hugepages when the base_virtaddr is configured lxu
2014-11-05 15:10 ` Burakov, Anatoly
2014-11-05 15:49 ` [dpdk-dev] 答复: " XU Liang
2014-11-05 15:59   ` Burakov, Anatoly
2014-11-05 16:10   ` [dpdk-dev] 答复:答复: " XU Liang
2014-11-26  1:46     ` Qiu, Michael
2014-11-26  9:58       ` Burakov, Anatoly
2014-11-06 14:11 ` [dpdk-dev] [PATCH v2] " lxu
2014-11-06 14:27   ` Burakov, Anatoly
2014-11-06 14:48   ` [dpdk-dev] 答复:[PATCH " 徐亮
2014-11-06 14:47 ` [dpdk-dev] [PATCH v3] " lxu
2014-11-06 15:06   ` De Lara Guarch, Pablo
2014-11-06 15:07 ` [dpdk-dev] [PATCH v4] " lxu
2014-11-06 15:12   ` Thomas Monjalon
2014-11-06 15:11 ` lxu
2014-11-06 15:32 ` [dpdk-dev] [PATCH v5] " lxu
2014-11-06 15:41   ` Burakov, Anatoly
2014-11-06 15:58     ` Thomas Monjalon
2014-11-06 16:10       ` Burakov, Anatoly
2014-11-06 17:30         ` Bruce Richardson
2014-11-07  8:01 ` [dpdk-dev] [PATCH v6] " lxu
2014-11-07  9:42   ` Bruce Richardson
2014-11-07  9:47   ` Burakov, Anatoly
2014-11-07  9:57   ` XU Liang
2014-11-07 14:37     ` XU Liang
2014-11-10 11:34   ` [dpdk-dev] [PATCH v7] eal: map PCI memory resources after hugepages Anatoly Burakov
2014-11-10 13:33     ` Burakov, Anatoly
2014-11-11  3:53     ` XU Liang
2014-11-11 10:09     ` [dpdk-dev] [PATCH v8] " Anatoly Burakov
2014-11-13 11:34       ` Burakov, Anatoly
2014-11-13 12:58         ` Bruce Richardson
2014-11-13 13:44           ` Burakov, Anatoly
2014-11-13 13:46       ` Bruce Richardson [this message]
2014-11-25 17:17         ` Thomas Monjalon
2014-11-07 14:57 ` [dpdk-dev] [PATCH v7] eal: map uio " lxu
2014-11-07 15:14   ` Burakov, Anatoly
2014-11-07 15:15   ` Thomas Monjalon
2014-11-07 15:19   ` XU Liang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20141113134655.GA8224@bricha3-MOBL3 \
    --to=bruce.richardson@intel.com \
    --cc=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git