From: Tiwei Bie <tiwei.bie@intel.com>
To: dev@dpdk.org
Cc: ferruh.yigit@intel.com, anatoly.burakov@intel.com,
bruce.richardson@intel.com, keith.wiles@intel.com,
david.marchand@redhat.com, alejandro.lucero@netronome.com,
cunming.liang@intel.com
Subject: [dpdk-dev] [RFC v2 2/5] bus/pci: avoid depending on private value in kernel source
Date: Mon, 15 Jul 2019 15:52:11 +0800 [thread overview]
Message-ID: <20190715075214.16616-3-tiwei.bie@intel.com> (raw)
In-Reply-To: <20190715075214.16616-1-tiwei.bie@intel.com>
The value 40 used in VFIO_GET_REGION_ADDR() is a private value
(VFIO_PCI_OFFSET_SHIFT) defined in Linux kernel source [1]. It
is not part of VFIO API, and we should not depend on it.
[1] https://github.com/torvalds/linux/blob/6fbc7275c7a9/drivers/vfio/pci/vfio_pci_private.h#L19
Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
drivers/bus/pci/linux/pci.c | 4 +-
drivers/bus/pci/linux/pci_init.h | 4 +-
drivers/bus/pci/linux/pci_vfio.c | 176 ++++++++++++++++++++++++-------
drivers/bus/pci/private.h | 10 ++
4 files changed, 154 insertions(+), 40 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index dfab7b81b..00bfbb301 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -639,7 +639,7 @@ int rte_pci_read_config(const struct rte_pci_device *device,
return pci_uio_read_config(intr_handle, buf, len, offset);
#ifdef VFIO_PRESENT
case RTE_KDRV_VFIO:
- return pci_vfio_read_config(intr_handle, buf, len, offset);
+ return pci_vfio_read_config(device, buf, len, offset);
#endif
default:
rte_pci_device_name(&device->addr, devname,
@@ -663,7 +663,7 @@ int rte_pci_write_config(const struct rte_pci_device *device,
return pci_uio_write_config(intr_handle, buf, len, offset);
#ifdef VFIO_PRESENT
case RTE_KDRV_VFIO:
- return pci_vfio_write_config(intr_handle, buf, len, offset);
+ return pci_vfio_write_config(device, buf, len, offset);
#endif
default:
rte_pci_device_name(&device->addr, devname,
diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index c2e603a37..c6542a8f9 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -64,9 +64,9 @@ int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
#endif
/* access config space */
-int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_read_config(const struct rte_pci_device *dev,
void *buf, size_t len, off_t offs);
-int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_write_config(const struct rte_pci_device *dev,
const void *buf, size_t len, off_t offs);
int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index ee3123965..2dc4a9299 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -49,35 +49,82 @@ static struct rte_tailq_elem rte_vfio_tailq = {
};
EAL_REGISTER_TAILQ(rte_vfio_tailq)
+static int
+pci_vfio_get_region(const struct rte_pci_device *dev, int index,
+ uint64_t *size, uint64_t *offset)
+{
+ const struct rte_pci_device_internal *pdev =
+ RTE_PCI_DEVICE_INTERNAL_CONST(dev);
+
+ if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS)
+ return -1;
+
+ if (pdev->region[index].size == 0 && pdev->region[index].offset == 0)
+ return -1;
+
+ *size = pdev->region[index].size;
+ *offset = pdev->region[index].offset;
+
+ return 0;
+}
+
int
-pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_read_config(const struct rte_pci_device *dev,
void *buf, size_t len, off_t offs)
{
- return pread64(intr_handle->vfio_dev_fd, buf, len,
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+ uint64_t size, offset;
+ int fd;
+
+ fd = dev->intr_handle.vfio_dev_fd;
+
+ if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ &size, &offset) != 0)
+ return -1;
+
+ if ((uint64_t)len + offs > size)
+ return -1;
+
+ return pread64(fd, buf, len, offset + offs);
}
int
-pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_write_config(const struct rte_pci_device *dev,
const void *buf, size_t len, off_t offs)
{
- return pwrite64(intr_handle->vfio_dev_fd, buf, len,
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+ uint64_t size, offset;
+ int fd;
+
+ fd = dev->intr_handle.vfio_dev_fd;
+
+ if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ &size, &offset) != 0)
+ return -1;
+
+ if ((uint64_t)len + offs > size)
+ return -1;
+
+ return pwrite64(fd, buf, len, offset + offs);
}
/* get PCI BAR number where MSI-X interrupts are */
static int
-pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
+pci_vfio_get_msix_bar(const struct rte_pci_device *dev, int fd,
+ struct pci_msix_table *msix_table)
{
int ret;
uint32_t reg;
uint16_t flags;
uint8_t cap_id, cap_offset;
+ uint64_t size, offset;
+
+ if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ &size, &offset) != 0) {
+ RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+ return -1;
+ }
/* read PCI capability pointer from config space */
- ret = pread64(fd, ®, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- PCI_CAPABILITY_LIST);
+ ret = pread64(fd, ®, sizeof(reg), offset + PCI_CAPABILITY_LIST);
if (ret != sizeof(reg)) {
RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
"config space!\n");
@@ -90,9 +137,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
while (cap_offset) {
/* read PCI capability ID */
- ret = pread64(fd, ®, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset);
+ ret = pread64(fd, ®, sizeof(reg), offset + cap_offset);
if (ret != sizeof(reg)) {
RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI "
"config space!\n");
@@ -105,8 +150,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
/* if we haven't reached MSI-X, check next capability */
if (cap_id != PCI_CAP_ID_MSIX) {
ret = pread64(fd, ®, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset);
+ offset + cap_offset);
if (ret != sizeof(reg)) {
RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
"config space!\n");
@@ -122,8 +166,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
else {
/* table offset resides in the next 4 bytes */
ret = pread64(fd, ®, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset + 4);
+ offset + cap_offset + 4);
if (ret != sizeof(reg)) {
RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config "
"space!\n");
@@ -131,8 +174,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
}
ret = pread64(fd, &flags, sizeof(flags),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset + 2);
+ offset + cap_offset + 2);
if (ret != sizeof(flags)) {
RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "
"space!\n");
@@ -152,14 +194,19 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
/* set PCI bus mastering */
static int
-pci_vfio_set_bus_master(int dev_fd, bool op)
+pci_vfio_set_bus_master(const struct rte_pci_device *dev, int dev_fd, bool op)
{
+ uint64_t size, offset;
uint16_t reg;
int ret;
- ret = pread64(dev_fd, ®, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- PCI_COMMAND);
+ if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ &size, &offset) != 0) {
+ RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+ return -1;
+ }
+
+ ret = pread64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND);
if (ret != sizeof(reg)) {
RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n");
return -1;
@@ -171,10 +218,7 @@ pci_vfio_set_bus_master(int dev_fd, bool op)
else
reg &= ~(PCI_COMMAND_MASTER);
- ret = pwrite64(dev_fd, ®, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- PCI_COMMAND);
-
+ ret = pwrite64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND);
if (ret != sizeof(reg)) {
RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n");
return -1;
@@ -405,14 +449,21 @@ pci_vfio_disable_notifier(struct rte_pci_device *dev)
#endif
static int
-pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
+pci_vfio_is_ioport_bar(const struct rte_pci_device *dev,
+ int vfio_dev_fd, int bar_index)
{
+ uint64_t size, offset;
uint32_t ioport_bar;
int ret;
+ if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ &size, &offset) != 0) {
+ RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n");
+ return -1;
+ }
+
ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
- + PCI_BASE_ADDRESS_0 + bar_index*4);
+ offset + PCI_BASE_ADDRESS_0 + bar_index*4);
if (ret != sizeof(ioport_bar)) {
RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",
PCI_BASE_ADDRESS_0 + bar_index*4);
@@ -431,7 +482,7 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
}
/* set bus mastering for the device */
- if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
+ if (pci_vfio_set_bus_master(dev, vfio_dev_fd, true)) {
RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
return -1;
}
@@ -645,11 +696,40 @@ pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
return ret;
}
+static int
+pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd,
+ struct vfio_device_info *device_info)
+{
+ struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev);
+ struct vfio_region_info *reg = NULL;
+ int nb_maps, i, ret;
+
+ nb_maps = RTE_MIN((int)device_info->num_regions,
+ VFIO_PCI_CONFIG_REGION_INDEX + 1);
+
+ for (i = 0; i < nb_maps; i++) {
+ ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
+ if (ret < 0) {
+ RTE_LOG(DEBUG, EAL, "%s cannot get device region info error %i (%s)\n",
+ dev->name, errno, strerror(errno));
+ return -1;
+ }
+
+ pdev->region[i].size = reg->size;
+ pdev->region[i].offset = reg->offset;
+
+ free(reg);
+ }
+
+ return 0;
+}
static int
pci_vfio_map_resource_primary(struct rte_pci_device *dev)
{
+ struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev);
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+ struct vfio_region_info *reg = NULL;
char pci_addr[PATH_MAX] = {0};
int vfio_dev_fd;
struct rte_pci_addr *loc = &dev->addr;
@@ -690,11 +770,22 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
/* map BARs */
maps = vfio_res->maps;
+ ret = pci_vfio_get_region_info(vfio_dev_fd, ®,
+ VFIO_PCI_CONFIG_REGION_INDEX);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "%s cannot get device region info error %i (%s)\n",
+ dev->name, errno, strerror(errno));
+ goto err_vfio_res;
+ }
+ pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].size = reg->size;
+ pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].offset = reg->offset;
+ free(reg);
+
vfio_res->msix_table.bar_index = -1;
/* get MSI-X BAR, if any (we have to know where it is because we can't
* easily mmap it when using VFIO)
*/
- ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table);
+ ret = pci_vfio_get_msix_bar(dev, vfio_dev_fd, &vfio_res->msix_table);
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
pci_addr);
@@ -715,7 +806,6 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
}
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
- struct vfio_region_info *reg = NULL;
void *bar_addr;
ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
@@ -726,8 +816,11 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
goto err_vfio_res;
}
+ pdev->region[i].size = reg->size;
+ pdev->region[i].offset = reg->offset;
+
/* chk for io port region */
- ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
+ ret = pci_vfio_is_ioport_bar(dev, vfio_dev_fd, i);
if (ret < 0) {
free(reg);
goto err_vfio_res;
@@ -833,6 +926,10 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
if (ret)
return ret;
+ ret = pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info);
+ if (ret)
+ return ret;
+
/* map BARs */
maps = vfio_res->maps;
@@ -938,7 +1035,7 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
return -1;
}
- if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) {
+ if (pci_vfio_set_bus_master(dev, dev->intr_handle.vfio_dev_fd, false)) {
RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n",
pci_addr);
return -1;
@@ -1016,14 +1113,21 @@ int
pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
{
+ uint64_t size, offset;
+
if (bar < VFIO_PCI_BAR0_REGION_INDEX ||
bar > VFIO_PCI_BAR5_REGION_INDEX) {
RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar);
return -1;
}
+ if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) {
+ RTE_LOG(ERR, EAL, "Cannot get offset of region %d.\n", bar);
+ return -1;
+ }
+
p->dev = dev;
- p->base = VFIO_GET_REGION_ADDR(bar);
+ p->base = offset;
return 0;
}
diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
index 3e2abd818..c09185b86 100644
--- a/drivers/bus/pci/private.h
+++ b/drivers/bus/pci/private.h
@@ -10,6 +10,8 @@
#include <rte_pci.h>
#include <rte_bus_pci.h>
+#define RTE_MAX_PCI_REGIONS 9
+
/*
* Convert struct rte_pci_device to struct rte_pci_device_internal
*/
@@ -25,8 +27,16 @@ struct rte_pci_device;
extern struct rte_pci_bus rte_pci_bus;
+struct rte_pci_region {
+ uint64_t size;
+ uint64_t offset;
+};
+
struct rte_pci_device_internal {
struct rte_pci_device device;
+
+ /* PCI regions provided by e.g. VFIO. */
+ struct rte_pci_region region[RTE_MAX_PCI_REGIONS];
};
/**
--
2.17.1
next prev parent reply other threads:[~2019-07-15 7:54 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-03 7:18 [dpdk-dev] [RFC 0/3] Add mdev (Mediated device) support in DPDK Tiwei Bie
2019-04-03 7:18 ` Tiwei Bie
2019-04-03 7:18 ` [dpdk-dev] [RFC 1/3] eal: add a helper for reading string from sysfs Tiwei Bie
2019-04-03 7:18 ` Tiwei Bie
2019-04-03 7:18 ` [dpdk-dev] [RFC 2/3] bus/mdev: add mdev bus support Tiwei Bie
2019-04-03 7:18 ` Tiwei Bie
2019-04-03 7:18 ` [dpdk-dev] [RFC 3/3] bus/pci: add mdev support Tiwei Bie
2019-04-03 7:18 ` Tiwei Bie
2019-04-03 14:13 ` Wiles, Keith
2019-04-03 14:13 ` Wiles, Keith
2019-04-04 4:19 ` Tiwei Bie
2019-04-04 4:19 ` Tiwei Bie
2019-04-08 8:44 ` [dpdk-dev] [RFC 0/3] Add mdev (Mediated device) support in DPDK Alejandro Lucero
2019-04-08 8:44 ` Alejandro Lucero
2019-04-08 9:36 ` Tiwei Bie
2019-04-08 9:36 ` Tiwei Bie
2019-04-10 10:02 ` Francois Ozog
2019-04-10 10:02 ` Francois Ozog
2023-07-03 23:54 ` Stephen Hemminger
2019-07-15 7:52 ` [dpdk-dev] [RFC v2 0/5] " Tiwei Bie
2019-07-15 7:52 ` [dpdk-dev] [RFC v2 1/5] bus/pci: introduce an internal representation of PCI device Tiwei Bie
2019-07-15 7:52 ` Tiwei Bie [this message]
2019-07-15 7:52 ` [dpdk-dev] [RFC v2 3/5] bus/pci: introduce helper for MMIO read and write Tiwei Bie
2019-07-15 7:52 ` [dpdk-dev] [RFC v2 4/5] eal: add a helper for reading string from sysfs Tiwei Bie
2019-07-15 7:52 ` [dpdk-dev] [RFC v2 5/5] bus/pci: add mdev support Tiwei Bie
2021-06-01 3:06 ` [dpdk-dev] [RFC v3 0/6] Add mdev (Mediated device) support in DPDK Chenbo Xia
2021-06-01 3:06 ` [dpdk-dev] [RFC v3 1/6] bus/pci: introduce an internal representation of PCI device Chenbo Xia
2021-06-01 3:06 ` [dpdk-dev] [RFC v3 2/6] bus/pci: avoid depending on private value in kernel source Chenbo Xia
2021-06-01 3:06 ` [dpdk-dev] [RFC v3 3/6] bus/pci: introduce helper for MMIO read and write Chenbo Xia
2021-06-01 3:06 ` [dpdk-dev] [RFC v3 4/6] eal: add a helper for reading string from sysfs Chenbo Xia
2021-06-01 5:37 ` Stephen Hemminger
2021-06-08 5:47 ` Xia, Chenbo
2021-06-01 5:39 ` Stephen Hemminger
2021-06-08 5:48 ` Xia, Chenbo
2021-06-11 7:19 ` Thomas Monjalon
2021-06-01 3:06 ` [dpdk-dev] [RFC v3 5/6] bus/pci: add mdev support Chenbo Xia
2021-06-01 3:06 ` [dpdk-dev] [RFC v3 6/6] bus/pci: add sparse mmap support for mediated PCI devices Chenbo Xia
2021-06-11 7:15 ` [dpdk-dev] [RFC v3 0/6] Add mdev (Mediated device) support in DPDK Thomas Monjalon
2021-06-15 2:49 ` Xia, Chenbo
2021-06-15 7:48 ` Thomas Monjalon
2021-06-15 10:44 ` Xia, Chenbo
2021-06-15 11:57 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190715075214.16616-3-tiwei.bie@intel.com \
--to=tiwei.bie@intel.com \
--cc=alejandro.lucero@netronome.com \
--cc=anatoly.burakov@intel.com \
--cc=bruce.richardson@intel.com \
--cc=cunming.liang@intel.com \
--cc=david.marchand@redhat.com \
--cc=dev@dpdk.org \
--cc=ferruh.yigit@intel.com \
--cc=keith.wiles@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).