DPDK patches and discussions
 help / color / mirror / Atom feed
From: Tiwei Bie <tiwei.bie@intel.com>
To: dev@dpdk.org
Cc: anatoly.burakov@intel.com
Subject: [dpdk-dev] [RFC PATCH] bus/pci: avoid depending on private value in kernel source
Date: Wed,  3 Jul 2019 13:45:08 +0800	[thread overview]
Message-ID: <20190703054508.22824-1-tiwei.bie@intel.com> (raw)

The value 40 used in VFIO_GET_REGION_ADDR() is a private value
(VFIO_PCI_OFFSET_SHIFT) defined in Linux kernel source [1]. It
is not part of VFIO API, and we should not depend on it.

[1] https://github.com/torvalds/linux/blob/6fbc7275c7a9/drivers/vfio/pci/vfio_pci_private.h#L19

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 drivers/bus/pci/linux/pci.c      |  18 +++-
 drivers/bus/pci/linux/pci_init.h |   6 +-
 drivers/bus/pci/linux/pci_vfio.c | 157 ++++++++++++++++++++++++-------
 drivers/bus/pci/rte_bus_pci.h    |  12 +++
 lib/librte_eal/linux/eal/eal.c   |  17 ++--
 5 files changed, 163 insertions(+), 47 deletions(-)

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index b931cf9d1..13fbdb018 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -337,6 +337,20 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
 	} else
 		dev->kdrv = RTE_KDRV_NONE;
 
+	switch (dev->kdrv) {
+	case RTE_KDRV_VFIO:
+#ifdef VFIO_PRESENT
+		ret = pci_vfio_fill_regions(dev);
+		if (ret != 0) {
+			free(dev);
+			return 1;
+		}
+#endif
+		break;
+	default:
+		break;
+	}
+
 	/* device is valid, add in list (sorted) */
 	if (TAILQ_EMPTY(&rte_pci_bus.device_list)) {
 		rte_pci_add_device(dev);
@@ -721,7 +735,7 @@ int rte_pci_read_config(const struct rte_pci_device *device,
 		return pci_uio_read_config(intr_handle, buf, len, offset);
 #ifdef VFIO_PRESENT
 	case RTE_KDRV_VFIO:
-		return pci_vfio_read_config(intr_handle, buf, len, offset);
+		return pci_vfio_read_config(device, buf, len, offset);
 #endif
 	default:
 		rte_pci_device_name(&device->addr, devname,
@@ -745,7 +759,7 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 		return pci_uio_write_config(intr_handle, buf, len, offset);
 #ifdef VFIO_PRESENT
 	case RTE_KDRV_VFIO:
-		return pci_vfio_write_config(intr_handle, buf, len, offset);
+		return pci_vfio_write_config(device, buf, len, offset);
 #endif
 	default:
 		rte_pci_device_name(&device->addr, devname,
diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index c2e603a37..a8399f39b 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -64,9 +64,9 @@ int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
 #endif
 
 /* access config space */
-int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_read_config(const struct rte_pci_device *dev,
 			 void *buf, size_t len, off_t offs);
-int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_write_config(const struct rte_pci_device *dev,
 			  const void *buf, size_t len, off_t offs);
 
 int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
@@ -83,6 +83,8 @@ int pci_vfio_unmap_resource(struct rte_pci_device *dev);
 
 int pci_vfio_is_enabled(void);
 
+int pci_vfio_fill_regions(struct rte_pci_device *dev);
+
 #endif
 
 #endif /* EAL_PCI_INIT_H_ */
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index ebf6ccd3c..06db8679c 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -48,35 +48,111 @@ static struct rte_tailq_elem rte_vfio_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_vfio_tailq)
 
+static int pci_vfio_get_region_info(int vfio_dev_fd,
+		struct vfio_region_info **info, int region);
+
+int pci_vfio_fill_regions(struct rte_pci_device *dev)
+{
+	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+	struct vfio_region_info *reg = NULL;
+	int vfio_dev_fd, nb_maps;
+	int i, ret;
+
+	ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), dev->name,
+					&vfio_dev_fd, &device_info);
+	if (ret != 0)
+		return -1;
+
+	nb_maps = RTE_MIN((int)device_info.num_regions,
+			VFIO_PCI_CONFIG_REGION_INDEX + 1);
+
+	for (i = 0; i < nb_maps; i++) {
+		ret = pci_vfio_get_region_info(vfio_dev_fd, &reg, i);
+		if (ret < 0) {
+			RTE_LOG(DEBUG, EAL, "%s cannot get device region info error %i (%s)\n",
+				dev->name, errno, strerror(errno));
+			goto out;
+		}
+
+		dev->region[i].size = reg->size;
+		dev->region[i].offset = reg->offset;
+
+		free(reg);
+	}
+
+out:
+	rte_vfio_release_device(rte_pci_get_sysfs_path(), dev->name,
+				vfio_dev_fd);
+	return ret;
+}
+
+static int
+pci_vfio_get_region(const struct rte_pci_device *dev, int index,
+			 uint64_t *size, uint64_t *offset)
+{
+	if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS)
+		return -1;
+
+	if (dev->region[index].size == 0 && dev->region[index].offset == 0)
+		return -1;
+
+	*size   = dev->region[index].size;
+	*offset = dev->region[index].offset;
+
+	return 0;
+}
+
 int
-pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_read_config(const struct rte_pci_device *dev,
 		    void *buf, size_t len, off_t offs)
 {
-	return pread64(intr_handle->vfio_dev_fd, buf, len,
-	       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+	uint64_t size, offset;
+
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+				&size, &offset) != 0)
+		return -1;
+
+	if ((uint64_t)len + offs > size)
+		return -1;
+
+	return pread64(dev->intr_handle.vfio_dev_fd, buf, len, offset + offs);
 }
 
 int
-pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_write_config(const struct rte_pci_device *dev,
 		    const void *buf, size_t len, off_t offs)
 {
-	return pwrite64(intr_handle->vfio_dev_fd, buf, len,
-	       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+	uint64_t size, offset;
+
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+				&size, &offset) != 0)
+		return -1;
+
+	if ((uint64_t)len + offs > size)
+		return -1;
+
+	return pwrite64(dev->intr_handle.vfio_dev_fd, buf, len, offset + offs);
 }
 
 /* get PCI BAR number where MSI-X interrupts are */
 static int
-pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
+pci_vfio_get_msix_bar(const struct rte_pci_device *dev, int fd,
+		      struct pci_msix_table *msix_table)
 {
 	int ret;
 	uint32_t reg;
 	uint16_t flags;
 	uint8_t cap_id, cap_offset;
+	uint64_t size, offset;
+
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+				&size, &offset) != 0) {
+		RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+		return -1;
+	}
 
 	/* read PCI capability pointer from config space */
-	ret = pread64(fd, &reg, sizeof(reg),
-			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-			PCI_CAPABILITY_LIST);
+	ret = pread64(fd, &reg, sizeof(reg), offset + PCI_CAPABILITY_LIST);
 	if (ret != sizeof(reg)) {
 		RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
 				"config space!\n");
@@ -89,9 +165,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 	while (cap_offset) {
 
 		/* read PCI capability ID */
-		ret = pread64(fd, &reg, sizeof(reg),
-				VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-				cap_offset);
+		ret = pread64(fd, &reg, sizeof(reg), offset + cap_offset);
 		if (ret != sizeof(reg)) {
 			RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI "
 					"config space!\n");
@@ -104,8 +178,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 		/* if we haven't reached MSI-X, check next capability */
 		if (cap_id != PCI_CAP_ID_MSIX) {
 			ret = pread64(fd, &reg, sizeof(reg),
-					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-					cap_offset);
+					offset + cap_offset);
 			if (ret != sizeof(reg)) {
 				RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
 						"config space!\n");
@@ -121,8 +194,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 		else {
 			/* table offset resides in the next 4 bytes */
 			ret = pread64(fd, &reg, sizeof(reg),
-					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-					cap_offset + 4);
+					offset + cap_offset + 4);
 			if (ret != sizeof(reg)) {
 				RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config "
 						"space!\n");
@@ -130,8 +202,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 			}
 
 			ret = pread64(fd, &flags, sizeof(flags),
-					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-					cap_offset + 2);
+					offset + cap_offset + 2);
 			if (ret != sizeof(flags)) {
 				RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "
 						"space!\n");
@@ -151,14 +222,19 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 
 /* set PCI bus mastering */
 static int
-pci_vfio_set_bus_master(int dev_fd, bool op)
+pci_vfio_set_bus_master(const struct rte_pci_device *dev, int dev_fd, bool op)
 {
+	uint64_t size, offset;
 	uint16_t reg;
 	int ret;
 
-	ret = pread64(dev_fd, &reg, sizeof(reg),
-			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-			PCI_COMMAND);
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+				&size, &offset) != 0) {
+		RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+		return -1;
+	}
+
+	ret = pread64(dev_fd, &reg, sizeof(reg), offset + PCI_COMMAND);
 	if (ret != sizeof(reg)) {
 		RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n");
 		return -1;
@@ -170,10 +246,7 @@ pci_vfio_set_bus_master(int dev_fd, bool op)
 	else
 		reg &= ~(PCI_COMMAND_MASTER);
 
-	ret = pwrite64(dev_fd, &reg, sizeof(reg),
-			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
-			PCI_COMMAND);
-
+	ret = pwrite64(dev_fd, &reg, sizeof(reg), offset + PCI_COMMAND);
 	if (ret != sizeof(reg)) {
 		RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n");
 		return -1;
@@ -402,14 +475,21 @@ pci_vfio_disable_notifier(struct rte_pci_device *dev)
 #endif
 
 static int
-pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
+pci_vfio_is_ioport_bar(const struct rte_pci_device *dev,
+		       int vfio_dev_fd, int bar_index)
 {
+	uint64_t size, offset;
 	uint32_t ioport_bar;
 	int ret;
 
+	if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+				&size, &offset) != 0) {
+		RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+		return -1;
+	}
+
 	ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
-			  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
-			  + PCI_BASE_ADDRESS_0 + bar_index*4);
+			  offset + PCI_BASE_ADDRESS_0 + bar_index*4);
 	if (ret != sizeof(ioport_bar)) {
 		RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",
 			PCI_BASE_ADDRESS_0 + bar_index*4);
@@ -428,7 +508,7 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
 	}
 
 	/* set bus mastering for the device */
-	if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
+	if (pci_vfio_set_bus_master(dev, vfio_dev_fd, true)) {
 		RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
 		return -1;
 	}
@@ -691,7 +771,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 	/* get MSI-X BAR, if any (we have to know where it is because we can't
 	 * easily mmap it when using VFIO)
 	 */
-	ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table);
+	ret = pci_vfio_get_msix_bar(dev, vfio_dev_fd, &vfio_res->msix_table);
 	if (ret < 0) {
 		RTE_LOG(ERR, EAL, "  %s cannot get MSI-X BAR number!\n",
 				pci_addr);
@@ -724,7 +804,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 		}
 
 		/* chk for io port region */
-		ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
+		ret = pci_vfio_is_ioport_bar(dev, vfio_dev_fd, i);
 		if (ret < 0) {
 			free(reg);
 			goto err_vfio_res;
@@ -935,7 +1015,7 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
 		return -1;
 	}
 
-	if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) {
+	if (pci_vfio_set_bus_master(dev, dev->intr_handle.vfio_dev_fd, false)) {
 		RTE_LOG(ERR, EAL, "  %s cannot unset bus mastering for PCI device!\n",
 				pci_addr);
 		return -1;
@@ -1013,14 +1093,21 @@ int
 pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
 		    struct rte_pci_ioport *p)
 {
+	uint64_t size, offset;
+
 	if (bar < VFIO_PCI_BAR0_REGION_INDEX ||
 	    bar > VFIO_PCI_BAR5_REGION_INDEX) {
 		RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar);
 		return -1;
 	}
 
+	if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) {
+		RTE_LOG(ERR, EAL, "Cannot get offset of region %d.\n", bar);
+		return -1;
+	}
+
 	p->dev = dev;
-	p->base = VFIO_GET_REGION_ADDR(bar);
+	p->base = offset;
 	return 0;
 }
 
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 06e004cd3..1d13bed75 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -51,6 +51,16 @@ TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver);
 
 struct rte_devargs;
 
+#define RTE_MAX_PCI_REGIONS	9
+
+/*
+ * Regions provided by e.g. VFIO.
+ */
+struct rte_pci_region {
+	uint64_t size;
+	uint64_t offset;
+};
+
 /**
  * A structure describing a PCI device.
  */
@@ -59,6 +69,8 @@ struct rte_pci_device {
 	struct rte_device device;           /**< Inherit core device */
 	struct rte_pci_addr addr;           /**< PCI location. */
 	struct rte_pci_id id;               /**< PCI ID. */
+	struct rte_pci_region region[RTE_MAX_PCI_REGIONS];
+					    /**< PCI regions. */
 	struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
 					    /**< PCI Memory Resource */
 	struct rte_intr_handle intr_handle; /**< Interrupt handle */
diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c
index 8a0b387ce..29b1a873a 100644
--- a/lib/librte_eal/linux/eal/eal.c
+++ b/lib/librte_eal/linux/eal/eal.c
@@ -1068,6 +1068,15 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+#ifdef VFIO_PRESENT
+	if (rte_eal_vfio_setup() < 0) {
+		rte_eal_init_alert("Cannot init VFIO");
+		rte_errno = EAGAIN;
+		rte_atomic32_clear(&run_once);
+		return -1;
+	}
+#endif
+
 	if (rte_bus_scan()) {
 		rte_eal_init_alert("Cannot scan the buses for devices");
 		rte_errno = ENODEV;
@@ -1130,14 +1139,6 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-#ifdef VFIO_PRESENT
-	if (rte_eal_vfio_setup() < 0) {
-		rte_eal_init_alert("Cannot init VFIO");
-		rte_errno = EAGAIN;
-		rte_atomic32_clear(&run_once);
-		return -1;
-	}
-#endif
 	/* in secondary processes, memory init may allocate additional fbarrays
 	 * not present in primary processes, so to avoid any potential issues,
 	 * initialize memzones first.
-- 
2.17.1


             reply	other threads:[~2019-07-03  5:47 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-03  5:45 Tiwei Bie [this message]
2019-07-03  7:02 ` David Marchand
2019-07-03  7:33   ` Tiwei Bie
2019-07-03  7:36     ` David Marchand
2019-07-03  7:56       ` Tiwei Bie
2019-07-03  8:01         ` David Marchand
2019-07-03  8:15           ` Tiwei Bie
2019-07-03  8:26             ` David Marchand
2019-07-03  8:59               ` Tiwei Bie
2019-07-03  9:10                 ` David Marchand
2019-07-03  9:25                   ` Tiwei Bie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190703054508.22824-1-tiwei.bie@intel.com \
    --to=tiwei.bie@intel.com \
    --cc=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).