From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org, Dimon Zhao <dimon.zhao@nebula-matrix.com>,
Kyo Liu <kyo.liu@nebula-matrix.com>,
Leon Yu <leon.yu@nebula-matrix.com>,
Sam Chen <sam.chen@nebula-matrix.com>,
Christian Koue Muf <ckm@napatech.com>,
Serhii Iliushyk <sil-plv@napatech.com>,
Chaoyong He <chaoyong.he@corigine.com>,
Vijay Kumar Srivastava <vsrivast@xilinx.com>,
Bruce Richardson <bruce.richardson@intel.com>,
Tyler Retzlaff <roretzla@linux.microsoft.com>
Subject: [PATCH v1 2/8] vfio: add container device assignment API
Date: Tue, 28 Oct 2025 16:43:15 +0000 [thread overview]
Message-ID: <f16a82d56c693bec5256d3fdc666dfebff895da7.1761669439.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <cover.1761669438.git.anatoly.burakov@intel.com>
Currently, VFIO has explicit group bind API's, but the way they're used is
such that no one actually cares about VFIO groups, and the real goal of
everyone using VFIO group bind API is to bind devices to particular VFIO
container, such that when `rte_vfio_setup_device` call eventually comes,
VFIO will pick up the correct container.
To remove dependency on group API's, add a new "container assign device"
API call that will do the same thing, but will not depend on managing VFIO
group fd's. All existing users of this API are adjusted to follow the new
device assignment semantics.
For NBL driver, the only need to call group bind API was to get group fd,
as otherwise their usage is similar to FSLMC bus where they do everything
manually, so instead of calling group bind API, just open the fd directly.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
drivers/net/nbl/nbl_common/nbl_userdev.c | 18 +++++++----
drivers/net/nbl/nbl_include/nbl_include.h | 1 +
drivers/net/ntnic/ntnic_vfio.c | 30 ++++++-----------
drivers/vdpa/ifc/ifcvf_vdpa.c | 15 ++-------
drivers/vdpa/nfp/nfp_vdpa.c | 17 +++-------
drivers/vdpa/sfc/sfc_vdpa.c | 39 ++++-------------------
drivers/vdpa/sfc/sfc_vdpa.h | 2 --
lib/eal/freebsd/eal.c | 10 ++++++
lib/eal/include/rte_vfio.h | 27 ++++++++++++++++
lib/eal/linux/eal_vfio.c | 32 +++++++++++++++++++
10 files changed, 105 insertions(+), 86 deletions(-)
diff --git a/drivers/net/nbl/nbl_common/nbl_userdev.c b/drivers/net/nbl/nbl_common/nbl_userdev.c
index 75e0e2884b..56d7d481c8 100644
--- a/drivers/net/nbl/nbl_common/nbl_userdev.c
+++ b/drivers/net/nbl/nbl_common/nbl_userdev.c
@@ -387,6 +387,13 @@ nbl_userdev_mem_event_callback(enum rte_mem_event type, const void *addr, size_t
}
}
+static int nbl_open_group_fd(int iommu_group_num)
+{
+ char path[PATH_MAX];
+ snprintf(path, sizeof(path), RTE_VFIO_GROUP_FMT, iommu_group_num);
+ return open(path, O_RDWR);
+}
+
static int nbl_mdev_map_device(struct nbl_adapter *adapter)
{
const struct rte_pci_device *pci_dev = adapter->pci_dev;
@@ -424,11 +431,12 @@ static int nbl_mdev_map_device(struct nbl_adapter *adapter)
}
NBL_LOG(DEBUG, "nbl vfio container %d", container);
- vfio_group_fd = rte_vfio_container_group_bind(container, common->iommu_group_num);
+ vfio_group_fd = nbl_open_group_fd(common->iommu_group_num);
if (vfio_group_fd < 0) {
NBL_LOG(ERR, "nbl vfio group bind failed, %d", vfio_group_fd);
goto free_container;
}
+ common->groupfd = vfio_group_fd;
/* check if the group is viable */
ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
@@ -535,7 +543,6 @@ static int nbl_mdev_map_device(struct nbl_adapter *adapter)
}
free_group:
close(vfio_group_fd);
- rte_vfio_clear_group(vfio_group_fd);
free_container:
if (container_create)
rte_vfio_container_destroy(container);
@@ -549,17 +556,14 @@ static int nbl_mdev_unmap_device(struct nbl_adapter *adapter)
close(common->devfd);
rte_mcfg_mem_read_lock();
- vfio_group_fd = rte_vfio_container_group_bind(nbl_default_container,
- common->iommu_group_num);
+ vfio_group_fd = common->groupfd;
NBL_LOG(DEBUG, "close vfio_group_fd %d", vfio_group_fd);
ret = ioctl(vfio_group_fd, VFIO_GROUP_UNSET_CONTAINER, &nbl_default_container);
if (ret)
NBL_LOG(ERR, "unset container, error %i (%s) %d",
errno, strerror(errno), ret);
nbl_group_count--;
- ret = rte_vfio_container_group_unbind(nbl_default_container, common->iommu_group_num);
- if (ret)
- NBL_LOG(ERR, "vfio container group unbind failed %d", ret);
+ close(vfio_group_fd);
if (!nbl_group_count) {
rte_mem_event_callback_unregister(NBL_USERDEV_EVENT_CLB_NAME, NULL);
nbl_userdev_dma_free();
diff --git a/drivers/net/nbl/nbl_include/nbl_include.h b/drivers/net/nbl/nbl_include/nbl_include.h
index 6423ada176..74c6092ddf 100644
--- a/drivers/net/nbl/nbl_include/nbl_include.h
+++ b/drivers/net/nbl/nbl_include/nbl_include.h
@@ -130,6 +130,7 @@ struct nbl_common_info {
u16 vsi_id;
u16 instance_id;
int devfd;
+ int groupfd;
int eventfd;
int ifindex;
int iommu_group_num;
diff --git a/drivers/net/ntnic/ntnic_vfio.c b/drivers/net/ntnic/ntnic_vfio.c
index 187829d287..478ad25b11 100644
--- a/drivers/net/ntnic/ntnic_vfio.c
+++ b/drivers/net/ntnic/ntnic_vfio.c
@@ -29,7 +29,6 @@ nt_vfio_vf_num(const struct rte_pci_device *pdev)
/* Internal API */
struct vfio_dev {
int container_fd;
- int group_fd;
int dev_fd;
uint64_t iova_addr;
};
@@ -51,7 +50,6 @@ nthw_vfio_setup(struct rte_pci_device *dev)
{
int ret;
char devname[RTE_DEV_NAME_MAX_LEN] = { 0 };
- int iommu_group_num;
int vf_num;
struct vfio_dev *vfio;
@@ -67,14 +65,9 @@ nthw_vfio_setup(struct rte_pci_device *dev)
}
vfio->dev_fd = -1;
- vfio->group_fd = -1;
vfio->iova_addr = START_VF_IOVA;
rte_pci_device_name(&dev->addr, devname, RTE_DEV_NAME_MAX_LEN);
- ret = rte_vfio_get_group_num(rte_pci_get_sysfs_path(), devname, &iommu_group_num);
- if (ret <= 0)
- return -1;
-
if (vf_num == 0) {
/* use default container for pf0 */
vfio->container_fd = RTE_VFIO_DEFAULT_CONTAINER_FD;
@@ -87,17 +80,14 @@ nthw_vfio_setup(struct rte_pci_device *dev)
"VFIO device setup failed. VFIO container creation failed.");
return -1;
}
- }
+ ret = rte_vfio_container_assign_device(vfio->container_fd,
+ rte_pci_get_sysfs_path(), devname);
+ if (ret < 0) {
+ NT_LOG(ERR, NTNIC,
+ "VFIO device setup failed. Assign device to container failed.");
+ goto err;
+ }
- vfio->group_fd = rte_vfio_container_group_bind(vfio->container_fd, iommu_group_num);
-
- if (vfio->group_fd < 0) {
- NT_LOG(ERR, NTNIC,
- "VFIO device setup failed. VFIO container group bind failed.");
- goto err;
- }
-
- if (vf_num > 0) {
if (rte_pci_map_device(dev)) {
NT_LOG(ERR, NTNIC,
"Map VFIO device failed. is the vfio-pci driver loaded?");
@@ -107,10 +97,8 @@ nthw_vfio_setup(struct rte_pci_device *dev)
vfio->dev_fd = rte_intr_dev_fd_get(dev->intr_handle);
- NT_LOG(DBG, NTNIC,
- "%s: VFIO id=%d, dev_fd=%d, container_fd=%d, group_fd=%d, iommu_group_num=%d",
- dev->name, vf_num, vfio->dev_fd, vfio->container_fd, vfio->group_fd,
- iommu_group_num);
+ NT_LOG(DBG, NTNIC, "%s: VFIO id=%d, dev_fd=%d, container_fd=%d",
+ dev->name, vf_num, vfio->dev_fd, vfio->container_fd);
return vf_num;
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index f319d455ba..6f1c050787 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -174,28 +174,19 @@ ifcvf_vfio_setup(struct ifcvf_internal *internal)
{
struct rte_pci_device *dev = internal->pdev;
char devname[RTE_DEV_NAME_MAX_LEN] = {0};
- int iommu_group_num;
- int i, ret;
+ int i;
internal->vfio_dev_fd = -1;
- internal->vfio_group_fd = -1;
internal->vfio_container_fd = -1;
rte_pci_device_name(&dev->addr, devname, RTE_DEV_NAME_MAX_LEN);
- ret = rte_vfio_get_group_num(rte_pci_get_sysfs_path(), devname,
- &iommu_group_num);
- if (ret <= 0) {
- DRV_LOG(ERR, "%s failed to get IOMMU group", devname);
- return -1;
- }
internal->vfio_container_fd = rte_vfio_container_create();
if (internal->vfio_container_fd < 0)
return -1;
- internal->vfio_group_fd = rte_vfio_container_group_bind(
- internal->vfio_container_fd, iommu_group_num);
- if (internal->vfio_group_fd < 0)
+ if (rte_vfio_container_assign_device(internal->vfio_container_fd,
+ rte_pci_get_sysfs_path(), devname) < 0)
goto err;
if (rte_pci_map_device(dev))
diff --git a/drivers/vdpa/nfp/nfp_vdpa.c b/drivers/vdpa/nfp/nfp_vdpa.c
index f4fd5c92ec..4885fa5cbc 100644
--- a/drivers/vdpa/nfp/nfp_vdpa.c
+++ b/drivers/vdpa/nfp/nfp_vdpa.c
@@ -122,33 +122,26 @@ nfp_vdpa_vfio_setup(struct nfp_vdpa_dev *device)
rte_pci_unmap_device(pci_dev);
rte_pci_device_name(&pci_dev->addr, dev_name, RTE_DEV_NAME_MAX_LEN);
- ret = rte_vfio_get_group_num(rte_pci_get_sysfs_path(), dev_name,
- &device->iommu_group);
- if (ret <= 0)
- return -1;
device->vfio_container_fd = rte_vfio_container_create();
if (device->vfio_container_fd < 0)
return -1;
- device->vfio_group_fd = rte_vfio_container_group_bind(
- device->vfio_container_fd, device->iommu_group);
- if (device->vfio_group_fd < 0)
+ ret = rte_vfio_container_assign_device(device->vfio_container_fd,
+ rte_pci_get_sysfs_path(), dev_name);
+ if (ret < 0)
goto container_destroy;
- DRV_VDPA_LOG(DEBUG, "The container_fd=%d, group_fd=%d.",
- device->vfio_container_fd, device->vfio_group_fd);
+ DRV_VDPA_LOG(DEBUG, "container_fd=%d", device->vfio_container_fd);
ret = rte_pci_map_device(pci_dev);
if (ret != 0)
- goto group_unbind;
+ goto container_destroy;
device->vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
return 0;
-group_unbind:
- rte_vfio_container_group_unbind(device->vfio_container_fd, device->iommu_group);
container_destroy:
rte_vfio_container_destroy(device->vfio_container_fd);
diff --git a/drivers/vdpa/sfc/sfc_vdpa.c b/drivers/vdpa/sfc/sfc_vdpa.c
index eda111954f..99b4ced3f4 100644
--- a/drivers/vdpa/sfc/sfc_vdpa.c
+++ b/drivers/vdpa/sfc/sfc_vdpa.c
@@ -80,22 +80,12 @@ sfc_vdpa_vfio_setup(struct sfc_vdpa_adapter *sva)
goto fail_container_create;
}
- rc = rte_vfio_get_group_num(rte_pci_get_sysfs_path(), dev_name,
- &sva->iommu_group_num);
- if (rc <= 0) {
- sfc_vdpa_err(sva, "failed to get IOMMU group for %s : %s",
- dev_name, rte_strerror(-rc));
- goto fail_get_group_num;
- }
-
- sva->vfio_group_fd =
- rte_vfio_container_group_bind(sva->vfio_container_fd,
- sva->iommu_group_num);
- if (sva->vfio_group_fd < 0) {
- sfc_vdpa_err(sva,
- "failed to bind IOMMU group %d to container %d",
- sva->iommu_group_num, sva->vfio_container_fd);
- goto fail_group_bind;
+ rc = rte_vfio_container_assign_device(sva->vfio_container_fd,
+ rte_pci_get_sysfs_path(), dev_name);
+ if (rc < 0) {
+ sfc_vdpa_err(sva, "failed to assign device %s to container %d",
+ dev_name, sva->vfio_container_fd);
+ goto fail_device_assign;
}
if (rte_pci_map_device(dev) != 0) {
@@ -109,15 +99,7 @@ sfc_vdpa_vfio_setup(struct sfc_vdpa_adapter *sva)
return 0;
fail_pci_map_device:
- if (rte_vfio_container_group_unbind(sva->vfio_container_fd,
- sva->iommu_group_num) != 0) {
- sfc_vdpa_err(sva,
- "failed to unbind IOMMU group %d from container %d",
- sva->iommu_group_num, sva->vfio_container_fd);
- }
-
-fail_group_bind:
-fail_get_group_num:
+fail_device_assign:
if (rte_vfio_container_destroy(sva->vfio_container_fd) != 0) {
sfc_vdpa_err(sva, "failed to destroy container %d",
sva->vfio_container_fd);
@@ -132,13 +114,6 @@ sfc_vdpa_vfio_teardown(struct sfc_vdpa_adapter *sva)
{
rte_pci_unmap_device(sva->pdev);
- if (rte_vfio_container_group_unbind(sva->vfio_container_fd,
- sva->iommu_group_num) != 0) {
- sfc_vdpa_err(sva,
- "failed to unbind IOMMU group %d from container %d",
- sva->iommu_group_num, sva->vfio_container_fd);
- }
-
if (rte_vfio_container_destroy(sva->vfio_container_fd) != 0) {
sfc_vdpa_err(sva,
"failed to destroy container %d",
diff --git a/drivers/vdpa/sfc/sfc_vdpa.h b/drivers/vdpa/sfc/sfc_vdpa.h
index 2b843e563d..99a81fd1b0 100644
--- a/drivers/vdpa/sfc/sfc_vdpa.h
+++ b/drivers/vdpa/sfc/sfc_vdpa.h
@@ -70,10 +70,8 @@ struct sfc_vdpa_adapter {
sfc_vdpa_filter_t filters;
- int vfio_group_fd;
int vfio_dev_fd;
int vfio_container_fd;
- int iommu_group_num;
struct sfc_vdpa_ops_data *ops_data;
};
diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
index 6215245ad5..fe2b017bba 100644
--- a/lib/eal/freebsd/eal.c
+++ b/lib/eal/freebsd/eal.c
@@ -937,3 +937,13 @@ rte_vfio_container_dma_unmap(__rte_unused int container_fd,
rte_errno = ENOTSUP;
return -1;
}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_vfio_container_assign_device, 26.02)
+int
+rte_vfio_container_assign_device(__rte_unused int vfio_container_fd,
+ __rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr)
+{
+ rte_errno = ENOTSUP;
+ return -1;
+}
diff --git a/lib/eal/include/rte_vfio.h b/lib/eal/include/rte_vfio.h
index 80951517fa..2eba736249 100644
--- a/lib/eal/include/rte_vfio.h
+++ b/lib/eal/include/rte_vfio.h
@@ -251,6 +251,33 @@ rte_vfio_container_create(void);
int
rte_vfio_container_destroy(int container_fd);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Assign a device to a VFIO container.
+ *
+ * Doing so will cause `rte_vfio_setup_device()` to set up the device with the VFIO container
+ * specified in this assign operation.
+ *
+ * This function is only relevant on Linux.
+ *
+ * @param vfio_container_fd
+ * VFIO container file descriptor.
+ * @param sysfs_base
+ * Sysfs path prefix.
+ * @param dev_addr
+ * Device identifier.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure, rte_errno is set.
+ */
+__rte_experimental
+int
+rte_vfio_container_assign_device(int vfio_container_fd, const char *sysfs_base,
+ const char *dev_addr);
+
/**
* Bind a IOMMU group to a container.
*
diff --git a/lib/eal/linux/eal_vfio.c b/lib/eal/linux/eal_vfio.c
index 45c1354390..3b3290927f 100644
--- a/lib/eal/linux/eal_vfio.c
+++ b/lib/eal/linux/eal_vfio.c
@@ -2128,6 +2128,38 @@ rte_vfio_container_destroy(int container_fd)
return 0;
}
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_vfio_container_assign_device, 26.02)
+int
+rte_vfio_container_assign_device(int vfio_container_fd, const char *sysfs_base,
+ const char *dev_addr)
+{
+ int iommu_group_num;
+ int ret;
+
+ ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num);
+ if (ret < 0) {
+ EAL_LOG(ERR, "Cannot get IOMMU group number for device %s",
+ dev_addr);
+ return -1;
+ } else if (ret == 0) {
+ EAL_LOG(ERR,
+ "Device %s is not assigned to any IOMMU group",
+ dev_addr);
+ return -1;
+ }
+
+ ret = rte_vfio_container_group_bind(vfio_container_fd,
+ iommu_group_num);
+ if (ret < 0) {
+ EAL_LOG(ERR,
+ "Cannot bind IOMMU group %d for device %s",
+ iommu_group_num, dev_addr);
+ return -1;
+ }
+
+ return 0;
+}
+
RTE_EXPORT_SYMBOL(rte_vfio_container_group_bind)
int
rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
--
2.47.3
next prev parent reply other threads:[~2025-10-28 16:43 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-28 16:43 [PATCH v1 0/8] Support VFIO cdev API in DPDK Anatoly Burakov
2025-10-28 16:43 ` [PATCH v1 1/8] uapi: update to v6.17 and add iommufd.h Anatoly Burakov
2025-10-28 16:43 ` Anatoly Burakov [this message]
2025-10-28 16:43 ` [PATCH v1 3/8] vhost: remove group-related API from drivers Anatoly Burakov
2025-10-28 16:43 ` [PATCH v1 4/8] vfio: do not setup the device on get device info Anatoly Burakov
2025-10-28 16:43 ` [PATCH v1 5/8] vfio: cleanup and refactor Anatoly Burakov
2025-10-28 16:43 ` [PATCH v1 6/8] vfio: introduce cdev mode Anatoly Burakov
2025-10-28 16:43 ` [PATCH v1 7/8] doc: deprecate VFIO group-based APIs Anatoly Burakov
2025-10-28 16:43 ` [PATCH v1 8/8] vfio: deprecate group-based API Anatoly Burakov
2025-10-29 9:50 ` 回复:[PATCH v1 0/8] Support VFIO cdev API in DPDK Dimon
2025-10-29 12:03 ` Burakov, Anatoly
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f16a82d56c693bec5256d3fdc666dfebff895da7.1761669439.git.anatoly.burakov@intel.com \
--to=anatoly.burakov@intel.com \
--cc=bruce.richardson@intel.com \
--cc=chaoyong.he@corigine.com \
--cc=ckm@napatech.com \
--cc=dev@dpdk.org \
--cc=dimon.zhao@nebula-matrix.com \
--cc=kyo.liu@nebula-matrix.com \
--cc=leon.yu@nebula-matrix.com \
--cc=roretzla@linux.microsoft.com \
--cc=sam.chen@nebula-matrix.com \
--cc=sil-plv@napatech.com \
--cc=vsrivast@xilinx.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).