* [dpdk-dev] [PATCH 18.11] pci/vfio: allow mapping MSI-X BARs if kernel allows it
@ 2018-07-30 11:17 Anatoly Burakov
2018-07-31 9:38 ` Takeshi Yoshimura
2018-07-31 11:28 ` [dpdk-dev] [PATCH 18.11 v2] " Anatoly Burakov
0 siblings, 2 replies; 8+ messages in thread
From: Anatoly Burakov @ 2018-07-30 11:17 UTC (permalink / raw)
To: dev; +Cc: jerin.jacob, thomas, t.yoshimura8869
Currently, DPDK will skip mapping some areas (or even an entire BAR)
if MSI-X happens to be in it but is smaller than page address.
Kernels 4.16+ will allow mapping MSI-X BARs [1], and will report this
as a capability flag. Capability flags themselves are also only
supported since kernel 4.6 [2].
This commit will introduce support for checking VFIO capabilities,
and will use it to check if we are allowed to map BARs with MSI-X
tables in them, along with backwards compatibility for older
kernels, including a workaround for a variable rename in VFIO
region info structure [3].
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
linux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
linux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279
[3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
linux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
drivers/bus/pci/linux/pci_vfio.c | 127 ++++++++++++++++++++---
lib/librte_eal/common/include/rte_vfio.h | 26 +++++
2 files changed, 140 insertions(+), 13 deletions(-)
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 686386d6a..e7765ee11 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -415,6 +415,88 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
return 0;
}
+/*
+ * region info may contain capability headers, so we need to keep reallocating
+ * the memory until we match allocated memory size with argsz.
+ */
+static int
+pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
+ int region)
+{
+ struct vfio_region_info *ri;
+ size_t argsz = sizeof(*ri);
+ int ret;
+
+ ri = malloc(sizeof(*ri));
+ if (ri == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
+ return -1;
+ }
+again:
+ memset(ri, 0, argsz);
+ ri->argsz = argsz;
+ ri->index = region;
+
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, info);
+ if (ret) {
+ free(ri);
+ return ret;
+ }
+ if (ri->argsz != argsz) {
+ argsz = ri->argsz;
+ ri = realloc(ri, argsz);
+
+ if (ri == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
+ return -1;
+ }
+ goto again;
+ }
+ *info = ri;
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+pci_vfio_info_cap(struct vfio_region_info *info, int cap)
+{
+ struct vfio_info_cap_header *h;
+ size_t offset;
+
+ if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
+ /* VFIO info does not advertise capabilities */
+ return NULL;
+ }
+
+ offset = VFIO_CAP_OFFSET(info);
+ while (offset != 0) {
+ h = RTE_PTR_ADD(info, offset);
+ if (h->id == cap)
+ return h;
+ offset = h->next;
+ }
+ return NULL;
+}
+
+static int
+pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
+{
+ struct vfio_region_info *info;
+ int ret;
+
+ ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
+ if (ret < 0)
+ return -1;
+
+ ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
+
+ /* cleanup */
+ free(info);
+
+ return ret;
+}
+
+
static int
pci_vfio_map_resource_primary(struct rte_pci_device *dev)
{
@@ -464,56 +546,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
pci_addr);
- goto err_vfio_dev_fd;
+ goto err_vfio_res;
+ }
+ /* if we found our MSI-X BAR region, check if we can mmap it */
+ if (vfio_res->msix_table.bar_index != -1) {
+ int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
+ vfio_res->msix_table.bar_index);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
+ goto err_vfio_res;
+ } else if (ret != 0) {
+ /* we can map it, so we don't care where it is */
+ RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
+ vfio_res->msix_table.bar_index = -1;
+ }
}
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
- struct vfio_region_info reg = { .argsz = sizeof(reg) };
+ struct vfio_region_info *reg;
void *bar_addr;
- reg.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
- if (ret) {
+ ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
+ if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get device region info "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ "error %i (%s)\n", pci_addr, errno,
+ strerror(errno));
goto err_vfio_res;
}
/* chk for io port region */
ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
- if (ret < 0)
+ if (ret < 0) {
+ free(reg);
goto err_vfio_res;
- else if (ret) {
+ } else if (ret) {
RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
i);
+ free(reg);
continue;
}
/* skip non-mmapable BARs */
- if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+ if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
+ free(reg);
continue;
+ }
/* try mapping somewhere close to the end of hugepages */
if (pci_map_addr == NULL)
pci_map_addr = pci_find_max_end_va();
bar_addr = pci_map_addr;
- pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+ pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
maps[i].addr = bar_addr;
- maps[i].offset = reg.offset;
- maps[i].size = reg.size;
+ maps[i].offset = reg->offset;
+ maps[i].size = reg->size;
maps[i].path = NULL; /* vfio doesn't have per-resource paths */
ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
pci_addr, i, strerror(errno));
+ free(reg);
goto err_vfio_res;
}
dev->mem_resource[i].addr = maps[i].addr;
+
+ free(reg);
}
if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index 5ca13fcce..f6617e004 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -14,6 +14,8 @@
extern "C" {
#endif
+#include <stdint.h>
+
/*
* determine if VFIO is present on the system
*/
@@ -44,6 +46,30 @@ extern "C" {
#define RTE_VFIO_NOIOMMU 8
#endif
+/*
+ * capabilities are only supported on kernel 4.6+. there were also some API
+ * changes as well, so add a macro to get cap offset.
+ */
+#ifdef VFIO_REGION_INFO_FLAG_CAPS
+#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
+#define VFIO_CAP_OFFSET(x) (x->cap_offset)
+#else
+#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
+#define VFIO_CAP_OFFSET(x) (x->resv)
+struct vfio_info_cap_header {
+ uint16_t id;
+ uint16_t version;
+ uint32_t next;
+};
+#endif
+
+/* kernels 4.16+ can map BAR containing MSI-X table */
+#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#else
+#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
+#endif
+
#else /* not VFIO_PRESENT */
/* we don't need an actual definition, only pointer is used */
--
2.17.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [dpdk-dev] [PATCH 18.11] pci/vfio: allow mapping MSI-X BARs if kernel allows it
2018-07-30 11:17 [dpdk-dev] [PATCH 18.11] pci/vfio: allow mapping MSI-X BARs if kernel allows it Anatoly Burakov
@ 2018-07-31 9:38 ` Takeshi Yoshimura
2018-07-31 11:24 ` Burakov, Anatoly
2018-07-31 11:28 ` [dpdk-dev] [PATCH 18.11 v2] " Anatoly Burakov
1 sibling, 1 reply; 8+ messages in thread
From: Takeshi Yoshimura @ 2018-07-31 9:38 UTC (permalink / raw)
To: Anatoly Burakov; +Cc: dev, Jerin Jacob, thomas
2018-07-30 20:17 GMT+09:00 Anatoly Burakov <anatoly.burakov@intel.com>:
> Currently, DPDK will skip mapping some areas (or even an entire BAR)
> if MSI-X happens to be in it but is smaller than page address.
>
> Kernels 4.16+ will allow mapping MSI-X BARs [1], and will report this
> as a capability flag. Capability flags themselves are also only
> supported since kernel 4.6 [2].
>
> This commit will introduce support for checking VFIO capabilities,
> and will use it to check if we are allowed to map BARs with MSI-X
> tables in them, along with backwards compatibility for older
> kernels, including a workaround for a variable rename in VFIO
> region info structure [3].
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6
>
> [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279
>
> [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> drivers/bus/pci/linux/pci_vfio.c | 127 ++++++++++++++++++++---
> lib/librte_eal/common/include/rte_vfio.h | 26 +++++
> 2 files changed, 140 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
> index 686386d6a..e7765ee11 100644
> --- a/drivers/bus/pci/linux/pci_vfio.c
> +++ b/drivers/bus/pci/linux/pci_vfio.c
> @@ -415,6 +415,88 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
> return 0;
> }
>
> +/*
> + * region info may contain capability headers, so we need to keep reallocating
> + * the memory until we match allocated memory size with argsz.
> + */
> +static int
> +pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
> + int region)
> +{
> + struct vfio_region_info *ri;
> + size_t argsz = sizeof(*ri);
> + int ret;
> +
> + ri = malloc(sizeof(*ri));
> + if (ri == NULL) {
> + RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
> + return -1;
> + }
> +again:
> + memset(ri, 0, argsz);
> + ri->argsz = argsz;
> + ri->index = region;
> +
> + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, info);
> + if (ret) {
> + free(ri);
> + return ret;
> + }
> + if (ri->argsz != argsz) {
> + argsz = ri->argsz;
> + ri = realloc(ri, argsz);
> +
> + if (ri == NULL) {
> + RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
> + return -1;
> + }
> + goto again;
> + }
> + *info = ri;
> +
> + return 0;
> +}
> +
> +static struct vfio_info_cap_header *
> +pci_vfio_info_cap(struct vfio_region_info *info, int cap)
> +{
> + struct vfio_info_cap_header *h;
> + size_t offset;
> +
> + if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
> + /* VFIO info does not advertise capabilities */
> + return NULL;
> + }
> +
> + offset = VFIO_CAP_OFFSET(info);
> + while (offset != 0) {
> + h = RTE_PTR_ADD(info, offset);
> + if (h->id == cap)
> + return h;
> + offset = h->next;
> + }
> + return NULL;
> +}
> +
> +static int
> +pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
> +{
> + struct vfio_region_info *info;
> + int ret;
> +
> + ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
> + if (ret < 0)
> + return -1;
> +
> + ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
> +
> + /* cleanup */
> + free(info);
> +
> + return ret;
> +}
> +
> +
> static int
> pci_vfio_map_resource_primary(struct rte_pci_device *dev)
> {
> @@ -464,56 +546,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
> if (ret < 0) {
> RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
> pci_addr);
> - goto err_vfio_dev_fd;
> + goto err_vfio_res;
> + }
> + /* if we found our MSI-X BAR region, check if we can mmap it */
> + if (vfio_res->msix_table.bar_index != -1) {
> + int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
> + vfio_res->msix_table.bar_index);
> + if (ret < 0) {
> + RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
> + goto err_vfio_res;
> + } else if (ret != 0) {
> + /* we can map it, so we don't care where it is */
> + RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
> + vfio_res->msix_table.bar_index = -1;
> + }
> }
>
> for (i = 0; i < (int) vfio_res->nb_maps; i++) {
> - struct vfio_region_info reg = { .argsz = sizeof(reg) };
> + struct vfio_region_info *reg;
> void *bar_addr;
>
> - reg.index = i;
> -
> - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
> - if (ret) {
> + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
> + if (ret < 0) {
> RTE_LOG(ERR, EAL, " %s cannot get device region info "
> - "error %i (%s)\n", pci_addr, errno, strerror(errno));
> + "error %i (%s)\n", pci_addr, errno,
> + strerror(errno));
> goto err_vfio_res;
> }
>
> /* chk for io port region */
> ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
> - if (ret < 0)
> + if (ret < 0) {
> + free(reg);
> goto err_vfio_res;
> - else if (ret) {
> + } else if (ret) {
> RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
> i);
> + free(reg);
> continue;
> }
>
> /* skip non-mmapable BARs */
> - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
> + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
> + free(reg);
> continue;
> + }
>
> /* try mapping somewhere close to the end of hugepages */
> if (pci_map_addr == NULL)
> pci_map_addr = pci_find_max_end_va();
>
> bar_addr = pci_map_addr;
> - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
> + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
>
> maps[i].addr = bar_addr;
> - maps[i].offset = reg.offset;
> - maps[i].size = reg.size;
> + maps[i].offset = reg->offset;
> + maps[i].size = reg->size;
> maps[i].path = NULL; /* vfio doesn't have per-resource paths */
>
> ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
> if (ret < 0) {
> RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
> pci_addr, i, strerror(errno));
> + free(reg);
> goto err_vfio_res;
> }
>
> dev->mem_resource[i].addr = maps[i].addr;
> +
> + free(reg);
> }
>
> if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
> diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
> index 5ca13fcce..f6617e004 100644
> --- a/lib/librte_eal/common/include/rte_vfio.h
> +++ b/lib/librte_eal/common/include/rte_vfio.h
> @@ -14,6 +14,8 @@
> extern "C" {
> #endif
>
> +#include <stdint.h>
> +
> /*
> * determine if VFIO is present on the system
> */
> @@ -44,6 +46,30 @@ extern "C" {
> #define RTE_VFIO_NOIOMMU 8
> #endif
>
> +/*
> + * capabilities are only supported on kernel 4.6+. there were also some API
> + * changes as well, so add a macro to get cap offset.
> + */
> +#ifdef VFIO_REGION_INFO_FLAG_CAPS
> +#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
> +#define VFIO_CAP_OFFSET(x) (x->cap_offset)
> +#else
> +#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
> +#define VFIO_CAP_OFFSET(x) (x->resv)
> +struct vfio_info_cap_header {
> + uint16_t id;
> + uint16_t version;
> + uint32_t next;
> +};
> +#endif
> +
> +/* kernels 4.16+ can map BAR containing MSI-X table */
> +#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
> +#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
> +#else
> +#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
> +#endif
> +
> #else /* not VFIO_PRESENT */
>
> /* we don't need an actual definition, only pointer is used */
> --
> 2.17.1
Hi Anatoly,
I have tested the patch on our ppc64le machine, but the
ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, info) in
pci_vfio_get_region_info() failed.
This may be an issue of ppc64le VFIO implementation. Let me investigate more...
Thanks,
Takeshi
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [dpdk-dev] [PATCH 18.11] pci/vfio: allow mapping MSI-X BARs if kernel allows it
2018-07-31 9:38 ` Takeshi Yoshimura
@ 2018-07-31 11:24 ` Burakov, Anatoly
0 siblings, 0 replies; 8+ messages in thread
From: Burakov, Anatoly @ 2018-07-31 11:24 UTC (permalink / raw)
To: Takeshi Yoshimura; +Cc: dev, Jerin Jacob, thomas
On 31-Jul-18 10:38 AM, Takeshi Yoshimura wrote:
> 2018-07-30 20:17 GMT+09:00 Anatoly Burakov <anatoly.burakov@intel.com>:
>> Currently, DPDK will skip mapping some areas (or even an entire BAR)
>> if MSI-X happens to be in it but is smaller than page address.
>>
>> Kernels 4.16+ will allow mapping MSI-X BARs [1], and will report this
>> as a capability flag. Capability flags themselves are also only
>> supported since kernel 4.6 [2].
>>
>> This commit will introduce support for checking VFIO capabilities,
>> and will use it to check if we are allowed to map BARs with MSI-X
>> tables in them, along with backwards compatibility for older
>> kernels, including a workaround for a variable rename in VFIO
>> region info structure [3].
>>
>> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
>> linux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6
>>
>> [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
>> linux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279
>>
>> [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
>> linux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>> ---
>> drivers/bus/pci/linux/pci_vfio.c | 127 ++++++++++++++++++++---
>> lib/librte_eal/common/include/rte_vfio.h | 26 +++++
>> 2 files changed, 140 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
>> index 686386d6a..e7765ee11 100644
>> --- a/drivers/bus/pci/linux/pci_vfio.c
>> +++ b/drivers/bus/pci/linux/pci_vfio.c
>> @@ -415,6 +415,88 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
>> return 0;
>> }
>>
>> +/*
>> + * region info may contain capability headers, so we need to keep reallocating
>> + * the memory until we match allocated memory size with argsz.
>> + */
>> +static int
>> +pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
>> + int region)
>> +{
>> + struct vfio_region_info *ri;
>> + size_t argsz = sizeof(*ri);
>> + int ret;
>> +
>> + ri = malloc(sizeof(*ri));
>> + if (ri == NULL) {
>> + RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
>> + return -1;
>> + }
>> +again:
>> + memset(ri, 0, argsz);
>> + ri->argsz = argsz;
>> + ri->index = region;
>> +
>> + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, info);
>> + if (ret) {
>> + free(ri);
>> + return ret;
>> + }
>> + if (ri->argsz != argsz) {
>> + argsz = ri->argsz;
>> + ri = realloc(ri, argsz);
>> +
>> + if (ri == NULL) {
>> + RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
>> + return -1;
>> + }
>> + goto again;
>> + }
>> + *info = ri;
>> +
>> + return 0;
>> +}
>> +
>> +static struct vfio_info_cap_header *
>> +pci_vfio_info_cap(struct vfio_region_info *info, int cap)
>> +{
>> + struct vfio_info_cap_header *h;
>> + size_t offset;
>> +
>> + if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
>> + /* VFIO info does not advertise capabilities */
>> + return NULL;
>> + }
>> +
>> + offset = VFIO_CAP_OFFSET(info);
>> + while (offset != 0) {
>> + h = RTE_PTR_ADD(info, offset);
>> + if (h->id == cap)
>> + return h;
>> + offset = h->next;
>> + }
>> + return NULL;
>> +}
>> +
>> +static int
>> +pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
>> +{
>> + struct vfio_region_info *info;
>> + int ret;
>> +
>> + ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
>> + if (ret < 0)
>> + return -1;
>> +
>> + ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
>> +
>> + /* cleanup */
>> + free(info);
>> +
>> + return ret;
>> +}
>> +
>> +
>> static int
>> pci_vfio_map_resource_primary(struct rte_pci_device *dev)
>> {
>> @@ -464,56 +546,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
>> if (ret < 0) {
>> RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
>> pci_addr);
>> - goto err_vfio_dev_fd;
>> + goto err_vfio_res;
>> + }
>> + /* if we found our MSI-X BAR region, check if we can mmap it */
>> + if (vfio_res->msix_table.bar_index != -1) {
>> + int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
>> + vfio_res->msix_table.bar_index);
>> + if (ret < 0) {
>> + RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
>> + goto err_vfio_res;
>> + } else if (ret != 0) {
>> + /* we can map it, so we don't care where it is */
>> + RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
>> + vfio_res->msix_table.bar_index = -1;
>> + }
>> }
>>
>> for (i = 0; i < (int) vfio_res->nb_maps; i++) {
>> - struct vfio_region_info reg = { .argsz = sizeof(reg) };
>> + struct vfio_region_info *reg;
>> void *bar_addr;
>>
>> - reg.index = i;
>> -
>> - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
>> - if (ret) {
>> + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
>> + if (ret < 0) {
>> RTE_LOG(ERR, EAL, " %s cannot get device region info "
>> - "error %i (%s)\n", pci_addr, errno, strerror(errno));
>> + "error %i (%s)\n", pci_addr, errno,
>> + strerror(errno));
>> goto err_vfio_res;
>> }
>>
>> /* chk for io port region */
>> ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
>> - if (ret < 0)
>> + if (ret < 0) {
>> + free(reg);
>> goto err_vfio_res;
>> - else if (ret) {
>> + } else if (ret) {
>> RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
>> i);
>> + free(reg);
>> continue;
>> }
>>
>> /* skip non-mmapable BARs */
>> - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
>> + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
>> + free(reg);
>> continue;
>> + }
>>
>> /* try mapping somewhere close to the end of hugepages */
>> if (pci_map_addr == NULL)
>> pci_map_addr = pci_find_max_end_va();
>>
>> bar_addr = pci_map_addr;
>> - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
>> + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
>>
>> maps[i].addr = bar_addr;
>> - maps[i].offset = reg.offset;
>> - maps[i].size = reg.size;
>> + maps[i].offset = reg->offset;
>> + maps[i].size = reg->size;
>> maps[i].path = NULL; /* vfio doesn't have per-resource paths */
>>
>> ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
>> if (ret < 0) {
>> RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
>> pci_addr, i, strerror(errno));
>> + free(reg);
>> goto err_vfio_res;
>> }
>>
>> dev->mem_resource[i].addr = maps[i].addr;
>> +
>> + free(reg);
>> }
>>
>> if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
>> diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
>> index 5ca13fcce..f6617e004 100644
>> --- a/lib/librte_eal/common/include/rte_vfio.h
>> +++ b/lib/librte_eal/common/include/rte_vfio.h
>> @@ -14,6 +14,8 @@
>> extern "C" {
>> #endif
>>
>> +#include <stdint.h>
>> +
>> /*
>> * determine if VFIO is present on the system
>> */
>> @@ -44,6 +46,30 @@ extern "C" {
>> #define RTE_VFIO_NOIOMMU 8
>> #endif
>>
>> +/*
>> + * capabilities are only supported on kernel 4.6+. there were also some API
>> + * changes as well, so add a macro to get cap offset.
>> + */
>> +#ifdef VFIO_REGION_INFO_FLAG_CAPS
>> +#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
>> +#define VFIO_CAP_OFFSET(x) (x->cap_offset)
>> +#else
>> +#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
>> +#define VFIO_CAP_OFFSET(x) (x->resv)
>> +struct vfio_info_cap_header {
>> + uint16_t id;
>> + uint16_t version;
>> + uint32_t next;
>> +};
>> +#endif
>> +
>> +/* kernels 4.16+ can map BAR containing MSI-X table */
>> +#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
>> +#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
>> +#else
>> +#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
>> +#endif
>> +
>> #else /* not VFIO_PRESENT */
>>
>> /* we don't need an actual definition, only pointer is used */
>> --
>> 2.17.1
>
> Hi Anatoly,
> I have tested the patch on our ppc64le machine, but the
> ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, info) in
> pci_vfio_get_region_info() failed.
> This may be an issue of ppc64le VFIO implementation. Let me investigate more...
>
> Thanks,
> Takeshi
>
Hi Takeshi, i think there's a bug in my patch. I'll submit a v2.
--
Thanks,
Anatoly
^ permalink raw reply [flat|nested] 8+ messages in thread
* [dpdk-dev] [PATCH 18.11 v2] pci/vfio: allow mapping MSI-X BARs if kernel allows it
2018-07-30 11:17 [dpdk-dev] [PATCH 18.11] pci/vfio: allow mapping MSI-X BARs if kernel allows it Anatoly Burakov
2018-07-31 9:38 ` Takeshi Yoshimura
@ 2018-07-31 11:28 ` Anatoly Burakov
2018-08-02 6:47 ` Takeshi Yoshimura
2018-09-20 13:11 ` [dpdk-dev] [PATCH v3] " Anatoly Burakov
1 sibling, 2 replies; 8+ messages in thread
From: Anatoly Burakov @ 2018-07-31 11:28 UTC (permalink / raw)
To: dev; +Cc: jerin.jacob, thomas, t.yoshimura8869
Currently, DPDK will skip mapping some areas (or even an entire BAR)
if MSI-X table happens to be in them but is smaller than page size.
Kernels 4.16+ will allow mapping MSI-X BARs [1], and will report this
as a capability flag. Capability flags themselves are also only
supported since kernel 4.6 [2].
This commit will introduce support for checking VFIO capabilities,
and will use it to check if we are allowed to map BARs with MSI-X
tables in them, along with backwards compatibility for older
kernels, including a workaround for a variable rename in VFIO
region info structure [3].
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
linux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
linux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279
[3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
linux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
Notes:
v2->v1:
- Fix pointer in pci_vfio_get_region_info
- Fix commit message
drivers/bus/pci/linux/pci_vfio.c | 127 ++++++++++++++++++++---
lib/librte_eal/common/include/rte_vfio.h | 26 +++++
2 files changed, 140 insertions(+), 13 deletions(-)
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 686386d6a..24f665c20 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -415,6 +415,88 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
return 0;
}
+/*
+ * region info may contain capability headers, so we need to keep reallocating
+ * the memory until we match allocated memory size with argsz.
+ */
+static int
+pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
+ int region)
+{
+ struct vfio_region_info *ri;
+ size_t argsz = sizeof(*ri);
+ int ret;
+
+ ri = malloc(sizeof(*ri));
+ if (ri == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
+ return -1;
+ }
+again:
+ memset(ri, 0, argsz);
+ ri->argsz = argsz;
+ ri->index = region;
+
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);
+ if (ret) {
+ free(ri);
+ return ret;
+ }
+ if (ri->argsz != argsz) {
+ argsz = ri->argsz;
+ ri = realloc(ri, argsz);
+
+ if (ri == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
+ return -1;
+ }
+ goto again;
+ }
+ *info = ri;
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+pci_vfio_info_cap(struct vfio_region_info *info, int cap)
+{
+ struct vfio_info_cap_header *h;
+ size_t offset;
+
+ if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
+ /* VFIO info does not advertise capabilities */
+ return NULL;
+ }
+
+ offset = VFIO_CAP_OFFSET(info);
+ while (offset != 0) {
+ h = RTE_PTR_ADD(info, offset);
+ if (h->id == cap)
+ return h;
+ offset = h->next;
+ }
+ return NULL;
+}
+
+static int
+pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
+{
+ struct vfio_region_info *info;
+ int ret;
+
+ ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
+ if (ret < 0)
+ return -1;
+
+ ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
+
+ /* cleanup */
+ free(info);
+
+ return ret;
+}
+
+
static int
pci_vfio_map_resource_primary(struct rte_pci_device *dev)
{
@@ -464,56 +546,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
pci_addr);
- goto err_vfio_dev_fd;
+ goto err_vfio_res;
+ }
+ /* if we found our MSI-X BAR region, check if we can mmap it */
+ if (vfio_res->msix_table.bar_index != -1) {
+ int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
+ vfio_res->msix_table.bar_index);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
+ goto err_vfio_res;
+ } else if (ret != 0) {
+ /* we can map it, so we don't care where it is */
+ RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
+ vfio_res->msix_table.bar_index = -1;
+ }
}
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
- struct vfio_region_info reg = { .argsz = sizeof(reg) };
+ struct vfio_region_info *reg;
void *bar_addr;
- reg.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
- if (ret) {
+ ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
+ if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get device region info "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ "error %i (%s)\n", pci_addr, errno,
+ strerror(errno));
goto err_vfio_res;
}
/* chk for io port region */
ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
- if (ret < 0)
+ if (ret < 0) {
+ free(reg);
goto err_vfio_res;
- else if (ret) {
+ } else if (ret) {
RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
i);
+ free(reg);
continue;
}
/* skip non-mmapable BARs */
- if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+ if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
+ free(reg);
continue;
+ }
/* try mapping somewhere close to the end of hugepages */
if (pci_map_addr == NULL)
pci_map_addr = pci_find_max_end_va();
bar_addr = pci_map_addr;
- pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+ pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
maps[i].addr = bar_addr;
- maps[i].offset = reg.offset;
- maps[i].size = reg.size;
+ maps[i].offset = reg->offset;
+ maps[i].size = reg->size;
maps[i].path = NULL; /* vfio doesn't have per-resource paths */
ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
pci_addr, i, strerror(errno));
+ free(reg);
goto err_vfio_res;
}
dev->mem_resource[i].addr = maps[i].addr;
+
+ free(reg);
}
if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index 5ca13fcce..f6617e004 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -14,6 +14,8 @@
extern "C" {
#endif
+#include <stdint.h>
+
/*
* determine if VFIO is present on the system
*/
@@ -44,6 +46,30 @@ extern "C" {
#define RTE_VFIO_NOIOMMU 8
#endif
+/*
+ * capabilities are only supported on kernel 4.6+. there were also some API
+ * changes as well, so add a macro to get cap offset.
+ */
+#ifdef VFIO_REGION_INFO_FLAG_CAPS
+#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
+#define VFIO_CAP_OFFSET(x) (x->cap_offset)
+#else
+#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
+#define VFIO_CAP_OFFSET(x) (x->resv)
+struct vfio_info_cap_header {
+ uint16_t id;
+ uint16_t version;
+ uint32_t next;
+};
+#endif
+
+/* kernels 4.16+ can map BAR containing MSI-X table */
+#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#else
+#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
+#endif
+
#else /* not VFIO_PRESENT */
/* we don't need an actual definition, only pointer is used */
--
2.17.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [dpdk-dev] [PATCH 18.11 v2] pci/vfio: allow mapping MSI-X BARs if kernel allows it
2018-07-31 11:28 ` [dpdk-dev] [PATCH 18.11 v2] " Anatoly Burakov
@ 2018-08-02 6:47 ` Takeshi Yoshimura
2018-08-02 8:17 ` Burakov, Anatoly
2018-09-20 13:11 ` [dpdk-dev] [PATCH v3] " Anatoly Burakov
1 sibling, 1 reply; 8+ messages in thread
From: Takeshi Yoshimura @ 2018-08-02 6:47 UTC (permalink / raw)
To: Anatoly Burakov; +Cc: dev, Jerin Jacob, thomas
2018-07-31 20:28 GMT+09:00 Anatoly Burakov <anatoly.burakov@intel.com>:
> Currently, DPDK will skip mapping some areas (or even an entire BAR)
> if MSI-X table happens to be in them but is smaller than page size.
>
> Kernels 4.16+ will allow mapping MSI-X BARs [1], and will report this
> as a capability flag. Capability flags themselves are also only
> supported since kernel 4.6 [2].
>
> This commit will introduce support for checking VFIO capabilities,
> and will use it to check if we are allowed to map BARs with MSI-X
> tables in them, along with backwards compatibility for older
> kernels, including a workaround for a variable rename in VFIO
> region info structure [3].
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6
>
> [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279
>
> [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>
> Notes:
> v2->v1:
> - Fix pointer in pci_vfio_get_region_info
> - Fix commit message
>
> drivers/bus/pci/linux/pci_vfio.c | 127 ++++++++++++++++++++---
> lib/librte_eal/common/include/rte_vfio.h | 26 +++++
> 2 files changed, 140 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
> index 686386d6a..24f665c20 100644
> --- a/drivers/bus/pci/linux/pci_vfio.c
> +++ b/drivers/bus/pci/linux/pci_vfio.c
> @@ -415,6 +415,88 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
> return 0;
> }
>
> +/*
> + * region info may contain capability headers, so we need to keep reallocating
> + * the memory until we match allocated memory size with argsz.
> + */
> +static int
> +pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
> + int region)
> +{
> + struct vfio_region_info *ri;
> + size_t argsz = sizeof(*ri);
> + int ret;
> +
> + ri = malloc(sizeof(*ri));
> + if (ri == NULL) {
> + RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
> + return -1;
> + }
> +again:
> + memset(ri, 0, argsz);
> + ri->argsz = argsz;
> + ri->index = region;
> +
> + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);
> + if (ret) {
> + free(ri);
> + return ret;
> + }
> + if (ri->argsz != argsz) {
> + argsz = ri->argsz;
> + ri = realloc(ri, argsz);
> +
> + if (ri == NULL) {
> + RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
> + return -1;
> + }
> + goto again;
> + }
> + *info = ri;
> +
> + return 0;
> +}
> +
> +static struct vfio_info_cap_header *
> +pci_vfio_info_cap(struct vfio_region_info *info, int cap)
> +{
> + struct vfio_info_cap_header *h;
> + size_t offset;
> +
> + if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
> + /* VFIO info does not advertise capabilities */
> + return NULL;
> + }
> +
> + offset = VFIO_CAP_OFFSET(info);
> + while (offset != 0) {
> + h = RTE_PTR_ADD(info, offset);
> + if (h->id == cap)
> + return h;
> + offset = h->next;
> + }
> + return NULL;
> +}
> +
> +static int
> +pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
> +{
> + struct vfio_region_info *info;
> + int ret;
> +
> + ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
> + if (ret < 0)
> + return -1;
> +
> + ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
> +
> + /* cleanup */
> + free(info);
> +
> + return ret;
> +}
> +
> +
> static int
> pci_vfio_map_resource_primary(struct rte_pci_device *dev)
> {
> @@ -464,56 +546,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
> if (ret < 0) {
> RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
> pci_addr);
> - goto err_vfio_dev_fd;
> + goto err_vfio_res;
> + }
> + /* if we found our MSI-X BAR region, check if we can mmap it */
> + if (vfio_res->msix_table.bar_index != -1) {
> + int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
> + vfio_res->msix_table.bar_index);
> + if (ret < 0) {
> + RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
> + goto err_vfio_res;
> + } else if (ret != 0) {
> + /* we can map it, so we don't care where it is */
> + RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
> + vfio_res->msix_table.bar_index = -1;
> + }
> }
>
> for (i = 0; i < (int) vfio_res->nb_maps; i++) {
> - struct vfio_region_info reg = { .argsz = sizeof(reg) };
> + struct vfio_region_info *reg;
> void *bar_addr;
>
> - reg.index = i;
> -
> - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
> - if (ret) {
> + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
> + if (ret < 0) {
> RTE_LOG(ERR, EAL, " %s cannot get device region info "
> - "error %i (%s)\n", pci_addr, errno, strerror(errno));
> + "error %i (%s)\n", pci_addr, errno,
> + strerror(errno));
> goto err_vfio_res;
> }
>
> /* chk for io port region */
> ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
> - if (ret < 0)
> + if (ret < 0) {
> + free(reg);
> goto err_vfio_res;
> - else if (ret) {
> + } else if (ret) {
> RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
> i);
> + free(reg);
> continue;
> }
>
> /* skip non-mmapable BARs */
> - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
> + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
> + free(reg);
> continue;
> + }
>
> /* try mapping somewhere close to the end of hugepages */
> if (pci_map_addr == NULL)
> pci_map_addr = pci_find_max_end_va();
>
> bar_addr = pci_map_addr;
> - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
> + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
>
> maps[i].addr = bar_addr;
> - maps[i].offset = reg.offset;
> - maps[i].size = reg.size;
> + maps[i].offset = reg->offset;
> + maps[i].size = reg->size;
> maps[i].path = NULL; /* vfio doesn't have per-resource paths */
>
> ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
> if (ret < 0) {
> RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
> pci_addr, i, strerror(errno));
> + free(reg);
> goto err_vfio_res;
> }
>
> dev->mem_resource[i].addr = maps[i].addr;
> +
> + free(reg);
> }
>
> if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
> diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
> index 5ca13fcce..f6617e004 100644
> --- a/lib/librte_eal/common/include/rte_vfio.h
> +++ b/lib/librte_eal/common/include/rte_vfio.h
> @@ -14,6 +14,8 @@
> extern "C" {
> #endif
>
> +#include <stdint.h>
> +
> /*
> * determine if VFIO is present on the system
> */
> @@ -44,6 +46,30 @@ extern "C" {
> #define RTE_VFIO_NOIOMMU 8
> #endif
>
> +/*
> + * capabilities are only supported on kernel 4.6+. there were also some API
> + * changes as well, so add a macro to get cap offset.
> + */
> +#ifdef VFIO_REGION_INFO_FLAG_CAPS
> +#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
> +#define VFIO_CAP_OFFSET(x) (x->cap_offset)
> +#else
> +#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
> +#define VFIO_CAP_OFFSET(x) (x->resv)
> +struct vfio_info_cap_header {
> + uint16_t id;
> + uint16_t version;
> + uint32_t next;
> +};
> +#endif
> +
> +/* kernels 4.16+ can map BAR containing MSI-X table */
> +#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
> +#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
> +#else
> +#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
> +#endif
> +
> #else /* not VFIO_PRESENT */
>
> /* we don't need an actual definition, only pointer is used */
> --
> 2.17.1
Hi Anatoly,
Please fix the error check for ioctl in pci_vfio_region_info() from
"if (ret)" to "if (ret < 0)"
My environment reported compiler errors with -Werror=maybe-uninitialized).
dpdk/drivers/bus/pci/linux/pci_vfio.c: In function
‘pci_vfio_map_resource_primary’:
dpdk/drivers/bus/pci/linux/pci_vfio.c:612:4: error: ‘reg’ may be used
uninitialized in this function [-Werror=maybe-uninitialized]
free(reg);
^~~~~~~~~
dpdk/drivers/bus/pci/linux/pci_vfio.c:495:2: error: ‘info’ may be used
uninitialized in this function [-Werror=maybe-uninitialized]
free(info);
^~~~~~~~~~
dpdk/drivers/bus/pci/linux/pci_vfio.c:485:27: note: ‘info’ was declared here
struct vfio_region_info *info;
Other code looks good to me.
I tested the updated patch with the above change and confirmed it
could mmap BAR on my ppc64le machine.
Thanks,
Takeshi
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [dpdk-dev] [PATCH 18.11 v2] pci/vfio: allow mapping MSI-X BARs if kernel allows it
2018-08-02 6:47 ` Takeshi Yoshimura
@ 2018-08-02 8:17 ` Burakov, Anatoly
0 siblings, 0 replies; 8+ messages in thread
From: Burakov, Anatoly @ 2018-08-02 8:17 UTC (permalink / raw)
To: Takeshi Yoshimura; +Cc: dev, Jerin Jacob, thomas
On 02-Aug-18 7:47 AM, Takeshi Yoshimura wrote:
> 2018-07-31 20:28 GMT+09:00 Anatoly Burakov <anatoly.burakov@intel.com>:
>> Currently, DPDK will skip mapping some areas (or even an entire BAR)
>> if MSI-X table happens to be in them but is smaller than page size.
>>
>> Kernels 4.16+ will allow mapping MSI-X BARs [1], and will report this
>> as a capability flag. Capability flags themselves are also only
>> supported since kernel 4.6 [2].
>>
>> This commit will introduce support for checking VFIO capabilities,
>> and will use it to check if we are allowed to map BARs with MSI-X
>> tables in them, along with backwards compatibility for older
>> kernels, including a workaround for a variable rename in VFIO
>> region info structure [3].
>>
>> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
>> linux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6
>>
>> [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
>> linux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279
>>
>> [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
>> linux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>> ---
>>
>> Notes:
>> v2->v1:
>> - Fix pointer in pci_vfio_get_region_info
>> - Fix commit message
>>
>> drivers/bus/pci/linux/pci_vfio.c | 127 ++++++++++++++++++++---
>> lib/librte_eal/common/include/rte_vfio.h | 26 +++++
>> 2 files changed, 140 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
>> index 686386d6a..24f665c20 100644
>> --- a/drivers/bus/pci/linux/pci_vfio.c
>> +++ b/drivers/bus/pci/linux/pci_vfio.c
>> @@ -415,6 +415,88 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
>> return 0;
>> }
>>
>> +/*
>> + * region info may contain capability headers, so we need to keep reallocating
>> + * the memory until we match allocated memory size with argsz.
>> + */
>> +static int
>> +pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
>> + int region)
>> +{
>> + struct vfio_region_info *ri;
>> + size_t argsz = sizeof(*ri);
>> + int ret;
>> +
>> + ri = malloc(sizeof(*ri));
>> + if (ri == NULL) {
>> + RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
>> + return -1;
>> + }
>> +again:
>> + memset(ri, 0, argsz);
>> + ri->argsz = argsz;
>> + ri->index = region;
>> +
>> + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);
>> + if (ret) {
>> + free(ri);
>> + return ret;
>> + }
>> + if (ri->argsz != argsz) {
>> + argsz = ri->argsz;
>> + ri = realloc(ri, argsz);
>> +
>> + if (ri == NULL) {
>> + RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
>> + return -1;
>> + }
>> + goto again;
>> + }
>> + *info = ri;
>> +
>> + return 0;
>> +}
>> +
>> +static struct vfio_info_cap_header *
>> +pci_vfio_info_cap(struct vfio_region_info *info, int cap)
>> +{
>> + struct vfio_info_cap_header *h;
>> + size_t offset;
>> +
>> + if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
>> + /* VFIO info does not advertise capabilities */
>> + return NULL;
>> + }
>> +
>> + offset = VFIO_CAP_OFFSET(info);
>> + while (offset != 0) {
>> + h = RTE_PTR_ADD(info, offset);
>> + if (h->id == cap)
>> + return h;
>> + offset = h->next;
>> + }
>> + return NULL;
>> +}
>> +
>> +static int
>> +pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
>> +{
>> + struct vfio_region_info *info;
>> + int ret;
>> +
>> + ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
>> + if (ret < 0)
>> + return -1;
>> +
>> + ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
>> +
>> + /* cleanup */
>> + free(info);
>> +
>> + return ret;
>> +}
>> +
>> +
>> static int
>> pci_vfio_map_resource_primary(struct rte_pci_device *dev)
>> {
>> @@ -464,56 +546,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
>> if (ret < 0) {
>> RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
>> pci_addr);
>> - goto err_vfio_dev_fd;
>> + goto err_vfio_res;
>> + }
>> + /* if we found our MSI-X BAR region, check if we can mmap it */
>> + if (vfio_res->msix_table.bar_index != -1) {
>> + int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
>> + vfio_res->msix_table.bar_index);
>> + if (ret < 0) {
>> + RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
>> + goto err_vfio_res;
>> + } else if (ret != 0) {
>> + /* we can map it, so we don't care where it is */
>> + RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
>> + vfio_res->msix_table.bar_index = -1;
>> + }
>> }
>>
>> for (i = 0; i < (int) vfio_res->nb_maps; i++) {
>> - struct vfio_region_info reg = { .argsz = sizeof(reg) };
>> + struct vfio_region_info *reg;
>> void *bar_addr;
>>
>> - reg.index = i;
>> -
>> - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
>> - if (ret) {
>> + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
>> + if (ret < 0) {
>> RTE_LOG(ERR, EAL, " %s cannot get device region info "
>> - "error %i (%s)\n", pci_addr, errno, strerror(errno));
>> + "error %i (%s)\n", pci_addr, errno,
>> + strerror(errno));
>> goto err_vfio_res;
>> }
>>
>> /* chk for io port region */
>> ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
>> - if (ret < 0)
>> + if (ret < 0) {
>> + free(reg);
>> goto err_vfio_res;
>> - else if (ret) {
>> + } else if (ret) {
>> RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
>> i);
>> + free(reg);
>> continue;
>> }
>>
>> /* skip non-mmapable BARs */
>> - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
>> + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
>> + free(reg);
>> continue;
>> + }
>>
>> /* try mapping somewhere close to the end of hugepages */
>> if (pci_map_addr == NULL)
>> pci_map_addr = pci_find_max_end_va();
>>
>> bar_addr = pci_map_addr;
>> - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
>> + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
>>
>> maps[i].addr = bar_addr;
>> - maps[i].offset = reg.offset;
>> - maps[i].size = reg.size;
>> + maps[i].offset = reg->offset;
>> + maps[i].size = reg->size;
>> maps[i].path = NULL; /* vfio doesn't have per-resource paths */
>>
>> ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
>> if (ret < 0) {
>> RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
>> pci_addr, i, strerror(errno));
>> + free(reg);
>> goto err_vfio_res;
>> }
>>
>> dev->mem_resource[i].addr = maps[i].addr;
>> +
>> + free(reg);
>> }
>>
>> if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
>> diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
>> index 5ca13fcce..f6617e004 100644
>> --- a/lib/librte_eal/common/include/rte_vfio.h
>> +++ b/lib/librte_eal/common/include/rte_vfio.h
>> @@ -14,6 +14,8 @@
>> extern "C" {
>> #endif
>>
>> +#include <stdint.h>
>> +
>> /*
>> * determine if VFIO is present on the system
>> */
>> @@ -44,6 +46,30 @@ extern "C" {
>> #define RTE_VFIO_NOIOMMU 8
>> #endif
>>
>> +/*
>> + * capabilities are only supported on kernel 4.6+. there were also some API
>> + * changes as well, so add a macro to get cap offset.
>> + */
>> +#ifdef VFIO_REGION_INFO_FLAG_CAPS
>> +#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
>> +#define VFIO_CAP_OFFSET(x) (x->cap_offset)
>> +#else
>> +#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
>> +#define VFIO_CAP_OFFSET(x) (x->resv)
>> +struct vfio_info_cap_header {
>> + uint16_t id;
>> + uint16_t version;
>> + uint32_t next;
>> +};
>> +#endif
>> +
>> +/* kernels 4.16+ can map BAR containing MSI-X table */
>> +#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
>> +#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
>> +#else
>> +#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
>> +#endif
>> +
>> #else /* not VFIO_PRESENT */
>>
>> /* we don't need an actual definition, only pointer is used */
>> --
>> 2.17.1
>
> Hi Anatoly,
> Please fix the error check for ioctl in pci_vfio_region_info() from
> "if (ret)" to "if (ret < 0)"
> My environment reported compiler errors with -Werror=maybe-uninitialized).
>
> dpdk/drivers/bus/pci/linux/pci_vfio.c: In function
> ‘pci_vfio_map_resource_primary’:
> dpdk/drivers/bus/pci/linux/pci_vfio.c:612:4: error: ‘reg’ may be used
> uninitialized in this function [-Werror=maybe-uninitialized]
> free(reg);
> ^~~~~~~~~
> dpdk/drivers/bus/pci/linux/pci_vfio.c:495:2: error: ‘info’ may be used
> uninitialized in this function [-Werror=maybe-uninitialized]
> free(info);
> ^~~~~~~~~~
> dpdk/drivers/bus/pci/linux/pci_vfio.c:485:27: note: ‘info’ was declared here
> struct vfio_region_info *info;
>
>
> Other code looks good to me.
> I tested the updated patch with the above change and confirmed it
> could mmap BAR on my ppc64le machine.
Thanks!
I'll fix it for v3.
>
> Thanks,
> Takeshi
>
--
Thanks,
Anatoly
^ permalink raw reply [flat|nested] 8+ messages in thread
* [dpdk-dev] [PATCH v3] pci/vfio: allow mapping MSI-X BARs if kernel allows it
2018-07-31 11:28 ` [dpdk-dev] [PATCH 18.11 v2] " Anatoly Burakov
2018-08-02 6:47 ` Takeshi Yoshimura
@ 2018-09-20 13:11 ` Anatoly Burakov
2018-10-03 22:40 ` Thomas Monjalon
1 sibling, 1 reply; 8+ messages in thread
From: Anatoly Burakov @ 2018-09-20 13:11 UTC (permalink / raw)
To: dev; +Cc: t.yoshimura8869, thomas, jerin.jacob
Currently, DPDK will skip mapping some areas (or even an entire BAR)
if MSI-X table happens to be in them but is smaller than page size.
Kernels 4.16+ will allow mapping MSI-X BARs [1], and will report this
as a capability flag. Capability flags themselves are also only
supported since kernel 4.6 [2].
This commit will introduce support for checking VFIO capabilities,
and will use it to check if we are allowed to map BARs with MSI-X
tables in them, along with backwards compatibility for older
kernels, including a workaround for a variable rename in VFIO
region info structure [3].
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
linux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
linux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279
[3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
linux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
Notes:
v3->v2:
- Fix potential uninitialized value access as per Takeshi's
comments
- Fix potential memory leak on failed memory reallocation
v2->v1:
- Fix pointer in pci_vfio_get_region_info
- Fix commit message
drivers/bus/pci/linux/pci_vfio.c | 132 ++++++++++++++++++++---
lib/librte_eal/common/include/rte_vfio.h | 26 +++++
2 files changed, 145 insertions(+), 13 deletions(-)
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 686386d6a..d112b4b54 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -415,6 +415,93 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
return 0;
}
+/*
+ * region info may contain capability headers, so we need to keep reallocating
+ * the memory until we match allocated memory size with argsz.
+ */
+static int
+pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
+ int region)
+{
+ struct vfio_region_info *ri;
+ size_t argsz = sizeof(*ri);
+ int ret;
+
+ ri = malloc(sizeof(*ri));
+ if (ri == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
+ return -1;
+ }
+again:
+ memset(ri, 0, argsz);
+ ri->argsz = argsz;
+ ri->index = region;
+
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);
+ if (ret < 0) {
+ free(ri);
+ return ret;
+ }
+ if (ri->argsz != argsz) {
+ struct vfio_region_info *tmp;
+
+ argsz = ri->argsz;
+ tmp = realloc(ri, argsz);
+
+ if (tmp == NULL) {
+ /* realloc failed but the ri is still there */
+ free(ri);
+ RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
+ return -1;
+ }
+ ri = tmp;
+ goto again;
+ }
+ *info = ri;
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+pci_vfio_info_cap(struct vfio_region_info *info, int cap)
+{
+ struct vfio_info_cap_header *h;
+ size_t offset;
+
+ if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
+ /* VFIO info does not advertise capabilities */
+ return NULL;
+ }
+
+ offset = VFIO_CAP_OFFSET(info);
+ while (offset != 0) {
+ h = RTE_PTR_ADD(info, offset);
+ if (h->id == cap)
+ return h;
+ offset = h->next;
+ }
+ return NULL;
+}
+
+static int
+pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
+{
+ struct vfio_region_info *info;
+ int ret;
+
+ ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
+ if (ret < 0)
+ return -1;
+
+ ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
+
+ /* cleanup */
+ free(info);
+
+ return ret;
+}
+
+
static int
pci_vfio_map_resource_primary(struct rte_pci_device *dev)
{
@@ -464,56 +551,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
pci_addr);
- goto err_vfio_dev_fd;
+ goto err_vfio_res;
+ }
+ /* if we found our MSI-X BAR region, check if we can mmap it */
+ if (vfio_res->msix_table.bar_index != -1) {
+ int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
+ vfio_res->msix_table.bar_index);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
+ goto err_vfio_res;
+ } else if (ret != 0) {
+ /* we can map it, so we don't care where it is */
+ RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
+ vfio_res->msix_table.bar_index = -1;
+ }
}
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
- struct vfio_region_info reg = { .argsz = sizeof(reg) };
+ struct vfio_region_info *reg = NULL;
void *bar_addr;
- reg.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
- if (ret) {
+ ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
+ if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get device region info "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ "error %i (%s)\n", pci_addr, errno,
+ strerror(errno));
goto err_vfio_res;
}
/* chk for io port region */
ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
- if (ret < 0)
+ if (ret < 0) {
+ free(reg);
goto err_vfio_res;
- else if (ret) {
+ } else if (ret) {
RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
i);
+ free(reg);
continue;
}
/* skip non-mmapable BARs */
- if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+ if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
+ free(reg);
continue;
+ }
/* try mapping somewhere close to the end of hugepages */
if (pci_map_addr == NULL)
pci_map_addr = pci_find_max_end_va();
bar_addr = pci_map_addr;
- pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+ pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
maps[i].addr = bar_addr;
- maps[i].offset = reg.offset;
- maps[i].size = reg.size;
+ maps[i].offset = reg->offset;
+ maps[i].size = reg->size;
maps[i].path = NULL; /* vfio doesn't have per-resource paths */
ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
pci_addr, i, strerror(errno));
+ free(reg);
goto err_vfio_res;
}
dev->mem_resource[i].addr = maps[i].addr;
+
+ free(reg);
}
if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index 5ca13fcce..f6617e004 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -14,6 +14,8 @@
extern "C" {
#endif
+#include <stdint.h>
+
/*
* determine if VFIO is present on the system
*/
@@ -44,6 +46,30 @@ extern "C" {
#define RTE_VFIO_NOIOMMU 8
#endif
+/*
+ * capabilities are only supported on kernel 4.6+. there were also some API
+ * changes as well, so add a macro to get cap offset.
+ */
+#ifdef VFIO_REGION_INFO_FLAG_CAPS
+#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
+#define VFIO_CAP_OFFSET(x) (x->cap_offset)
+#else
+#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
+#define VFIO_CAP_OFFSET(x) (x->resv)
+struct vfio_info_cap_header {
+ uint16_t id;
+ uint16_t version;
+ uint32_t next;
+};
+#endif
+
+/* kernels 4.16+ can map BAR containing MSI-X table */
+#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#else
+#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
+#endif
+
#else /* not VFIO_PRESENT */
/* we don't need an actual definition, only pointer is used */
--
2.17.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [dpdk-dev] [PATCH v3] pci/vfio: allow mapping MSI-X BARs if kernel allows it
2018-09-20 13:11 ` [dpdk-dev] [PATCH v3] " Anatoly Burakov
@ 2018-10-03 22:40 ` Thomas Monjalon
0 siblings, 0 replies; 8+ messages in thread
From: Thomas Monjalon @ 2018-10-03 22:40 UTC (permalink / raw)
To: Anatoly Burakov; +Cc: dev, t.yoshimura8869, jerin.jacob
20/09/2018 15:11, Anatoly Burakov:
> Currently, DPDK will skip mapping some areas (or even an entire BAR)
> if MSI-X table happens to be in them but is smaller than page size.
>
> Kernels 4.16+ will allow mapping MSI-X BARs [1], and will report this
> as a capability flag. Capability flags themselves are also only
> supported since kernel 4.6 [2].
>
> This commit will introduce support for checking VFIO capabilities,
> and will use it to check if we are allowed to map BARs with MSI-X
> tables in them, along with backwards compatibility for older
> kernels, including a workaround for a variable rename in VFIO
> region info structure [3].
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6
>
> [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279
>
> [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
> linux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Applied, thanks
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2018-10-03 22:40 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-30 11:17 [dpdk-dev] [PATCH 18.11] pci/vfio: allow mapping MSI-X BARs if kernel allows it Anatoly Burakov
2018-07-31 9:38 ` Takeshi Yoshimura
2018-07-31 11:24 ` Burakov, Anatoly
2018-07-31 11:28 ` [dpdk-dev] [PATCH 18.11 v2] " Anatoly Burakov
2018-08-02 6:47 ` Takeshi Yoshimura
2018-08-02 8:17 ` Burakov, Anatoly
2018-09-20 13:11 ` [dpdk-dev] [PATCH v3] " Anatoly Burakov
2018-10-03 22:40 ` Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).