DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Burakov, Anatoly" <anatoly.burakov@intel.com>
To: Jonas Pfefferle <jpf@zurich.ibm.com>, dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH] vfio: refactor PCI BAR mapping
Date: Tue, 19 Sep 2017 12:40:51 +0100	[thread overview]
Message-ID: <8c14c677-6341-6b3c-aa20-36a9297b1f94@intel.com> (raw)
In-Reply-To: <1502969759-11036-1-git-send-email-jpf@zurich.ibm.com>

Hi Jonas,

On 17-Aug-17 12:35 PM, Jonas Pfefferle wrote:
> Split pci_vfio_map_resource for primary and secondary processes.
> Save all relevant mapping data in primary process to allow
> the secondary process to perform mappings.
> 
> Signed-off-by: Jonas Pfefferle <jpf at zurich.ibm.com>
> ---
>   lib/librte_eal/common/include/rte_pci.h    |   7 +
>   lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 447 +++++++++++++++++------------
>   2 files changed, 271 insertions(+), 183 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
> index 8b12339..0821af9 100644
> --- a/lib/librte_eal/common/include/rte_pci.h
> +++ b/lib/librte_eal/common/include/rte_pci.h
> @@ -214,6 +214,12 @@ struct pci_map {
>   	uint64_t phaddr;
>   };
>   
> +struct pci_msix_table {
> +	int bar_index;
> +	uint32_t offset;
> +	uint32_t size;
> +};
> +
>   /**
>    * A structure describing a mapped PCI resource.
>    * For multi-process we need to reproduce all PCI mappings in secondary
> @@ -226,6 +232,7 @@ struct mapped_pci_resource {
>   	char path[PATH_MAX];
>   	int nb_maps;
>   	struct pci_map maps[PCI_MAX_RESOURCE];
> +	struct pci_msix_table msix_table;
>   };
>   
>   /** mapped pci device list */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index aa9d96e..f37552a 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -88,8 +88,7 @@ pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
>   
>   /* get PCI BAR number where MSI-X interrupts are */
>   static int
> -pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
> -		      uint32_t *msix_table_size)
> +pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
>   {
>   	int ret;
>   	uint32_t reg;
> @@ -161,9 +160,10 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
>   				return -1;
>   			}
>   
> -			*msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
> -			*msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> -			*msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
> +			msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> +			msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> +			msix_table->size =
> +				16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
>   
>   			return 0;
>   		}
> @@ -300,25 +300,152 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
>   	return -1;
>   }
>   
> -/*
> - * map the PCI resources of a PCI device in virtual memory (VFIO version).
> - * primary and secondary processes follow almost exactly the same path
> - */
> -int
> -pci_vfio_map_resource(struct rte_pci_device *dev)
> +static int
> +pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
> +{
> +	uint32_t ioport_bar;
> +	int ret;
> +
> +	ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
> +			  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
> +			  + PCI_BASE_ADDRESS_0 + bar_index*4);
> +	if (ret != sizeof(ioport_bar)) {
> +		RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",
> +			PCI_BASE_ADDRESS_0 + bar_index*4);
> +		return -1;
> +	}
> +
> +	if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) {
> +		RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d) addr: %x\n",
> +			 bar_index, ioport_bar);

This log message should probably go to the "continue" portion of the 
calling code, it looks out of place here.

> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +static int
> +pci_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
> +{
> +	if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
> +		RTE_LOG(ERR, EAL, "Error setting up interrupts!\n");
> +		return -1;
> +	}
> +
> +	/* set bus mastering for the device */
> +	if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
> +		RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
> +		return -1;
> +	}
> +
> +	/* Reset the device */
> +	ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
> +
> +	return 0;
> +}
> +
> +static int
> +pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
> +		int bar_index, int additional_flags)
> +{
> +	struct memreg {
> +		unsigned long offset, size;
> +	} memreg[2] = {};
> +	void *bar_addr;
> +	struct pci_msix_table *msix_table = &vfio_res->msix_table;
> +	struct pci_map *bar = &vfio_res->maps[bar_index];
> +
> +	if (bar->size == 0)
> +		/* Skip this BAR */
> +		return 0;
> +
> +	if (msix_table->bar_index == bar_index) {
> +		/*
> +		 * VFIO will not let us map the MSI-X table,
> +		 * but we can map around it.
> +		 */
> +		uint32_t table_start = msix_table->offset;
> +		uint32_t table_end = table_start + msix_table->size;
> +		table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
> +		table_start &= PAGE_MASK;
> +
> +		if (table_start == 0 && table_end >= bar->size) {
> +			/* Cannot map this BAR */
> +			RTE_LOG(DEBUG, EAL, "Skipping BAR%d\n", bar_index);
> +			bar->size = 0;
> +			bar->addr = 0;
> +			return 0;
> +		}
> +
> +		memreg[0].offset = bar->offset;
> +		memreg[0].size = table_start;
> +		memreg[1].offset = bar->offset + table_end;
> +		memreg[1].size = bar->size - table_end;
> +
> +		RTE_LOG(DEBUG, EAL,
> +			"Trying to map BAR%d that contains the MSI-X "
> +			"table. Trying offsets: "
> +			"0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", bar_index,
> +			memreg[0].offset, memreg[0].size,
> +			memreg[1].offset, memreg[1].size);
> +	}

I believe you forgot the "else" part. memreg is, by default, initialized 
to zeroes, and if bar_index is not equal to MSI-X bar index, memreg does 
not get filled with any values, and therefore all of the following 
checks for memreg.size etc. will return false and you'll end up with 
failed BAR mappings.

Confirmed with testing:

EAL: PCI device 0000:08:00.0 on NUMA socket 0
EAL:   probe driver: 8086:10fb net_ixgbe
EAL:   using IOMMU type 1 (Type 1)
EAL: Failed to map pci BAR0
EAL:   0000:08:00.0 mapping BAR0 failed: Success
EAL: Requested device 0000:08:00.0 cannot be used

--
Thanks,
Anatoly

  reply	other threads:[~2017-09-19 11:41 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-17 11:35 Jonas Pfefferle
2017-09-19 11:40 ` Burakov, Anatoly [this message]
2017-09-20 14:38   ` Jonas Pfefferle1

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8c14c677-6341-6b3c-aa20-36a9297b1f94@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    --cc=jpf@zurich.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).