From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 11A28A0471 for ; Mon, 15 Jul 2019 09:54:28 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id C3DBB1B9AC; Mon, 15 Jul 2019 09:54:16 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id 1E8DF1B95C for ; Mon, 15 Jul 2019 09:54:12 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Jul 2019 00:54:12 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.63,493,1557212400"; d="scan'208";a="175025611" Received: from npg-dpdk-virtio-tbie-2.sh.intel.com ([10.67.104.66]) by FMSMGA003.fm.intel.com with ESMTP; 15 Jul 2019 00:54:11 -0700 From: Tiwei Bie To: dev@dpdk.org Cc: ferruh.yigit@intel.com, anatoly.burakov@intel.com, bruce.richardson@intel.com, keith.wiles@intel.com, david.marchand@redhat.com, alejandro.lucero@netronome.com, cunming.liang@intel.com Date: Mon, 15 Jul 2019 15:52:11 +0800 Message-Id: <20190715075214.16616-3-tiwei.bie@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190715075214.16616-1-tiwei.bie@intel.com> References: <20190403071844.21126-1-tiwei.bie@intel.com> <20190715075214.16616-1-tiwei.bie@intel.com> Subject: [dpdk-dev] [RFC v2 2/5] bus/pci: avoid depending on private value in kernel source X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The value 40 used in VFIO_GET_REGION_ADDR() is a private value (VFIO_PCI_OFFSET_SHIFT) defined in Linux kernel source [1]. It is not part of VFIO API, and we should not depend on it. [1] https://github.com/torvalds/linux/blob/6fbc7275c7a9/drivers/vfio/pci/vfio_pci_private.h#L19 Signed-off-by: Tiwei Bie --- drivers/bus/pci/linux/pci.c | 4 +- drivers/bus/pci/linux/pci_init.h | 4 +- drivers/bus/pci/linux/pci_vfio.c | 176 ++++++++++++++++++++++++------- drivers/bus/pci/private.h | 10 ++ 4 files changed, 154 insertions(+), 40 deletions(-) diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index dfab7b81b..00bfbb301 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -639,7 +639,7 @@ int rte_pci_read_config(const struct rte_pci_device *device, return pci_uio_read_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: - return pci_vfio_read_config(intr_handle, buf, len, offset); + return pci_vfio_read_config(device, buf, len, offset); #endif default: rte_pci_device_name(&device->addr, devname, @@ -663,7 +663,7 @@ int rte_pci_write_config(const struct rte_pci_device *device, return pci_uio_write_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: - return pci_vfio_write_config(intr_handle, buf, len, offset); + return pci_vfio_write_config(device, buf, len, offset); #endif default: rte_pci_device_name(&device->addr, devname, diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h index c2e603a37..c6542a8f9 100644 --- a/drivers/bus/pci/linux/pci_init.h +++ b/drivers/bus/pci/linux/pci_init.h @@ -64,9 +64,9 @@ int pci_uio_ioport_unmap(struct rte_pci_ioport *p); #endif /* access config space */ -int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, +int pci_vfio_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offs); -int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, +int pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs); int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index ee3123965..2dc4a9299 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -49,35 +49,82 @@ static struct rte_tailq_elem rte_vfio_tailq = { }; EAL_REGISTER_TAILQ(rte_vfio_tailq) +static int +pci_vfio_get_region(const struct rte_pci_device *dev, int index, + uint64_t *size, uint64_t *offset) +{ + const struct rte_pci_device_internal *pdev = + RTE_PCI_DEVICE_INTERNAL_CONST(dev); + + if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS) + return -1; + + if (pdev->region[index].size == 0 && pdev->region[index].offset == 0) + return -1; + + *size = pdev->region[index].size; + *offset = pdev->region[index].offset; + + return 0; +} + int -pci_vfio_read_config(const struct rte_intr_handle *intr_handle, +pci_vfio_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offs) { - return pread64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); + uint64_t size, offset; + int fd; + + fd = dev->intr_handle.vfio_dev_fd; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pread64(fd, buf, len, offset + offs); } int -pci_vfio_write_config(const struct rte_intr_handle *intr_handle, +pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs) { - return pwrite64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); + uint64_t size, offset; + int fd; + + fd = dev->intr_handle.vfio_dev_fd; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pwrite64(fd, buf, len, offset + offs); } /* get PCI BAR number where MSI-X interrupts are */ static int -pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) +pci_vfio_get_msix_bar(const struct rte_pci_device *dev, int fd, + struct pci_msix_table *msix_table) { int ret; uint32_t reg; uint16_t flags; uint8_t cap_id, cap_offset; + uint64_t size, offset; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } /* read PCI capability pointer from config space */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_CAPABILITY_LIST); + ret = pread64(fd, ®, sizeof(reg), offset + PCI_CAPABILITY_LIST); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " "config space!\n"); @@ -90,9 +137,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) while (cap_offset) { /* read PCI capability ID */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); + ret = pread64(fd, ®, sizeof(reg), offset + cap_offset); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " "config space!\n"); @@ -105,8 +150,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) /* if we haven't reached MSI-X, check next capability */ if (cap_id != PCI_CAP_ID_MSIX) { ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); + offset + cap_offset); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " "config space!\n"); @@ -122,8 +166,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) else { /* table offset resides in the next 4 bytes */ ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 4); + offset + cap_offset + 4); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " "space!\n"); @@ -131,8 +174,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) } ret = pread64(fd, &flags, sizeof(flags), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 2); + offset + cap_offset + 2); if (ret != sizeof(flags)) { RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " "space!\n"); @@ -152,14 +194,19 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) /* set PCI bus mastering */ static int -pci_vfio_set_bus_master(int dev_fd, bool op) +pci_vfio_set_bus_master(const struct rte_pci_device *dev, int dev_fd, bool op) { + uint64_t size, offset; uint16_t reg; int ret; - ret = pread64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + + ret = pread64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); return -1; @@ -171,10 +218,7 @@ pci_vfio_set_bus_master(int dev_fd, bool op) else reg &= ~(PCI_COMMAND_MASTER); - ret = pwrite64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - + ret = pwrite64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); return -1; @@ -405,14 +449,21 @@ pci_vfio_disable_notifier(struct rte_pci_device *dev) #endif static int -pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) +pci_vfio_is_ioport_bar(const struct rte_pci_device *dev, + int vfio_dev_fd, int bar_index) { + uint64_t size, offset; uint32_t ioport_bar; int ret; + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) - + PCI_BASE_ADDRESS_0 + bar_index*4); + offset + PCI_BASE_ADDRESS_0 + bar_index*4); if (ret != sizeof(ioport_bar)) { RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n", PCI_BASE_ADDRESS_0 + bar_index*4); @@ -431,7 +482,7 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) } /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { + if (pci_vfio_set_bus_master(dev, vfio_dev_fd, true)) { RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); return -1; } @@ -645,11 +696,40 @@ pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) return ret; } +static int +pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd, + struct vfio_device_info *device_info) +{ + struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev); + struct vfio_region_info *reg = NULL; + int nb_maps, i, ret; + + nb_maps = RTE_MIN((int)device_info->num_regions, + VFIO_PCI_CONFIG_REGION_INDEX + 1); + + for (i = 0; i < nb_maps; i++) { + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); + if (ret < 0) { + RTE_LOG(DEBUG, EAL, "%s cannot get device region info error %i (%s)\n", + dev->name, errno, strerror(errno)); + return -1; + } + + pdev->region[i].size = reg->size; + pdev->region[i].offset = reg->offset; + + free(reg); + } + + return 0; +} static int pci_vfio_map_resource_primary(struct rte_pci_device *dev) { + struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev); struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + struct vfio_region_info *reg = NULL; char pci_addr[PATH_MAX] = {0}; int vfio_dev_fd; struct rte_pci_addr *loc = &dev->addr; @@ -690,11 +770,22 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) /* map BARs */ maps = vfio_res->maps; + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, + VFIO_PCI_CONFIG_REGION_INDEX); + if (ret < 0) { + RTE_LOG(ERR, EAL, "%s cannot get device region info error %i (%s)\n", + dev->name, errno, strerror(errno)); + goto err_vfio_res; + } + pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].size = reg->size; + pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].offset = reg->offset; + free(reg); + vfio_res->msix_table.bar_index = -1; /* get MSI-X BAR, if any (we have to know where it is because we can't * easily mmap it when using VFIO) */ - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table); + ret = pci_vfio_get_msix_bar(dev, vfio_dev_fd, &vfio_res->msix_table); if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); @@ -715,7 +806,6 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) } for (i = 0; i < (int) vfio_res->nb_maps; i++) { - struct vfio_region_info *reg = NULL; void *bar_addr; ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); @@ -726,8 +816,11 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) goto err_vfio_res; } + pdev->region[i].size = reg->size; + pdev->region[i].offset = reg->offset; + /* chk for io port region */ - ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); + ret = pci_vfio_is_ioport_bar(dev, vfio_dev_fd, i); if (ret < 0) { free(reg); goto err_vfio_res; @@ -833,6 +926,10 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) if (ret) return ret; + ret = pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info); + if (ret) + return ret; + /* map BARs */ maps = vfio_res->maps; @@ -938,7 +1035,7 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) return -1; } - if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { + if (pci_vfio_set_bus_master(dev, dev->intr_handle.vfio_dev_fd, false)) { RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", pci_addr); return -1; @@ -1016,14 +1113,21 @@ int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) { + uint64_t size, offset; + if (bar < VFIO_PCI_BAR0_REGION_INDEX || bar > VFIO_PCI_BAR5_REGION_INDEX) { RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); return -1; } + if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of region %d.\n", bar); + return -1; + } + p->dev = dev; - p->base = VFIO_GET_REGION_ADDR(bar); + p->base = offset; return 0; } diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h index 3e2abd818..c09185b86 100644 --- a/drivers/bus/pci/private.h +++ b/drivers/bus/pci/private.h @@ -10,6 +10,8 @@ #include #include +#define RTE_MAX_PCI_REGIONS 9 + /* * Convert struct rte_pci_device to struct rte_pci_device_internal */ @@ -25,8 +27,16 @@ struct rte_pci_device; extern struct rte_pci_bus rte_pci_bus; +struct rte_pci_region { + uint64_t size; + uint64_t offset; +}; + struct rte_pci_device_internal { struct rte_pci_device device; + + /* PCI regions provided by e.g. VFIO. */ + struct rte_pci_region region[RTE_MAX_PCI_REGIONS]; }; /** -- 2.17.1