DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 00/15] add virtio_blk device support to vdpa/ifc
@ 2022-01-25  6:47 Andy Pei
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                   ` (14 more replies)
  0 siblings, 15 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

Andy Pei (15):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add blk dev sw live migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA lauch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
    same when blk device pause
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c    |  42 ++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 534 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 +
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 152 +++++++++++
 examples/vdpa/vdpa_blk_compact.h | 118 +++++++++
 examples/vdpa/vhost_user.h       | 190 ++++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  15 ++
 usertools/dpdk-devbind.py        |   8 +
 12 files changed, 1053 insertions(+), 54 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 01/15] vdpa/ifc: add support for virtio blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (6 more replies)
  2022-01-25  6:47 ` [PATCH 02/15] vhost: add vdpa ops " Andy Pei
                   ` (13 subsequent siblings)
  14 siblings, 7 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemeted with proper feature and ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 96 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 102 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3853c4c..48056d1 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,14 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/**
+** vdpa decice info includes device features and devcic operation.
+**/
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1170,6 +1179,50 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/**
+		** Transitional devices: use the PCI subsystem device id as
+		** virtio device id, same as legacy driver always did.
+		**/
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/**
+		** Modern devices: simply use PCI device id,
+		** but start from 0x1040.
+		**/
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1181,6 +1234,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1230,13 +1284,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1248,7 +1313,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1316,6 +1382,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 02/15] vhost: add vdpa ops for blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index fc2d6ac..9a23db9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 03/15] vdpa/ifc: add blk ops for ifc device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-25  6:47 ` [PATCH 02/15] vhost: add vdpa ops " Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 88 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 48056d1..965baa2 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1093,6 +1093,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1207,6 +1211,88 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	__u64 capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %lu",
+			len, sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/**
+	** cannot read 64-bit register in one attempt,
+	** so read byte by byte.
+	**/
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (__u64)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1219,7 +1305,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (2 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For the blk we need to relay all the cmd of each queue.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 48 +++++++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 965baa2..9729490 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -374,24 +374,50 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/**
+				** For the net we only need to relay rx queue,
+				** which will change the mem of VM.
+				**/
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/**
+				** For the blk we need to relay all the read cmd
+				** of each queue
+				**/
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 05/15] vdpa/ifc: add blk dev sw live migration
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (3 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 06/15] example/vdpa:add vdpa blk support in example Andy Pei
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, Jin Yu

Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty. Maybe we can improve it later.

Signed-off-by: Jin Yu <jin.yu@intel.com>
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 130 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 118 insertions(+), 22 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9729490..1f832a3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -334,10 +334,68 @@ struct rte_vdpa_dev_info {
 
 	rte_vhost_get_negotiated_features(vid, &features);
 	if (RTE_VHOST_NEED_LOG(features)) {
-		ifcvf_disable_logging(hw);
-		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-				log_base, IFCVF_LOG_BASE, log_size);
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
+		/**
+		** IFCVF marks dirty memory pages for only packet buffer,
+		** SW helps to mark the used ring as dirty after device stops.
+		**/
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
 		/*
 		 * IFCVF marks dirty memory pages for only packet buffer,
 		 * SW helps to mark the used ring as dirty after device stops.
@@ -665,15 +723,18 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/**
+		** NETWORK: Direct I/O for Tx queue, relay for Rx queue
+		** BLK: relay every queue
+		**/
+		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
 				return -1;
 			}
 			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -692,7 +753,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -717,8 +781,10 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
+		     internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -730,6 +796,8 @@ struct rte_vdpa_dev_info {
 			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
 			m_vring_iova, size);
 
+		hw->vring[i].last_avail_idx = vq.used->idx;
+		hw->vring[i].last_used_idx = vq.used->idx;
 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
 				hw->vring[i].last_used_idx);
 		rte_free(internal->m_vring[i].desc);
@@ -780,17 +848,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -878,7 +965,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 06/15] example/vdpa:add vdpa blk support in example
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (4 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 07/15] usertools: add support for virtio blk device Andy Pei
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 ++
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 152 +++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 118 ++++++++++++++++++++++++
 examples/vdpa/vhost_user.h       | 190 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 470 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
index d974db4..9d0479b 100644
--- a/examples/vdpa/Makefile
+++ b/examples/vdpa/Makefile
@@ -5,7 +5,7 @@
 APP = vdpa
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c vdpa_blk_compact.c
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 PKGCONF ?= pkg-config
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..3fa3d3a 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -156,6 +157,7 @@ struct vdpa_port {
 static const struct rte_vhost_device_ops vdpa_sample_devops = {
 	.new_device = new_device,
 	.destroy_device = destroy_device,
+	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
 };
 
 static int
@@ -192,6 +194,12 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
+		< 0)
+		rte_exit(EXIT_FAILURE,
+			"set vhost blk driver features and protocal features failed: %s\n",
+			socket_path);
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
index bd08605..f0d111c 100644
--- a/examples/vdpa/meson.build
+++ b/examples/vdpa/meson.build
@@ -15,4 +15,5 @@ deps += 'vhost'
 allow_experimental_apis = true
 sources = files(
         'main.c',
+	'vdpa_blk_compact.c',
 )
diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
new file mode 100644
index 0000000..7310ebb
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.c
@@ -0,0 +1,152 @@
+/*
+**    INTEL CONFIDENTIAL
+**
+**    Copyright (c) Intel Corporation.
+**    All rights reserved.
+**
+**    The source code contained or described herein and all documents related
+**    to the source code ("Material") are owned by Intel Corporation or its
+**    suppliers or licensors.  Title to the Material remains with Intel
+**    Corporation or its suppliers and licensors.  The Material contains trade
+**    secrets and proprietary and confidential information of Intel or its
+**    suppliers and licensors.  The Material is protected by worldwide
+**    copyright and trade secret laws and treaty provisions.  No part of the
+**    Material may be used, copied, reproduced, modified, published, uploaded,
+**    posted, transmitted, distributed, or disclosed in any way without Intel's
+**    prior express written permission.
+**
+**    No license under any patent, copyright, trade secret or other
+**    intellectual property right is granted to or conferred upon you by
+**    disclosure or delivery of the Materials, either expressly, by
+**    implication, inducement, estoppel or otherwise.  Any license under such
+**    intellectual property rights must be express and approved by Intel in
+**    writing.
+*/
+
+/**
+** @file
+**
+** Block device specific vhost lib
+**/
+
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include <vdpa_driver.h>
+#include <rte_vhost.h>
+#include "vdpa_blk_compact.h"
+#include "vhost_user.h"
+
+#define VHOST_USER_GET_CONFIG	24
+#define VHOST_USER_SET_CONFIG	25
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
+/*
+ * Function to handle vhost user blk message
+ */
+static enum rte_vhost_msg_result
+rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
+{
+	struct VhostUserMsg *msg = _msg;
+	struct rte_vdpa_device *vdev = NULL;
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	if (vdev == NULL)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	fprintf(stderr, "msg is %d\n", msg->request.master);
+	switch (msg->request.master) {
+	case VHOST_USER_GET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
+
+		if (vdev->ops->get_config) {
+			fprintf(stdout, "get_config() function is valid!\n");
+			rc = vdev->ops->get_config(vid,
+						   msg->payload.cfg.region,
+						   msg->payload.cfg.size);
+			if (rc != 0) {
+				msg->size = 0;
+				fprintf(stdout, "get_config() return error!\n");
+			}
+		} else {
+			fprintf(stdout, "get_config() function is invalid!\n");
+		}
+
+		return RTE_VHOST_MSG_RESULT_REPLY;
+	}
+	case VHOST_USER_SET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout,
+			"read message VHOST_USER_SET_CONFIG\n");
+
+		if (vdev->ops->set_config) {
+			rc = vdev->ops->set_config(vid,
+				msg->payload.cfg.region,
+				msg->payload.cfg.offset,
+				msg->payload.cfg.size,
+				msg->payload.cfg.flags);
+		}
+
+		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+	}
+	default:
+		break;
+	}
+
+	return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
+}
+
+struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
+	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
+	.post_msg_handle = NULL,
+};
+
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid)
+{
+	int rc;
+
+	rc = rte_vhost_extern_callback_register(vid,
+						&g_blk_extern_vhost_ops,
+						NULL);
+	if (rc != 0) {
+		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
+		return -1;
+	}
+	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
+	return 0;
+}
+
+
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev)
+{
+	uint64_t protocol_features = 0;
+
+	if (!vdev) {
+		fprintf(stdout, "vdev is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* vdpa net does not have the get_config */
+	if (!vdev->ops->get_config)
+		return 0;
+
+	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
+	rte_vhost_driver_disable_features(path,
+		SPDK_VHOST_BLK_DISABLED_FEATURES);
+
+	rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+	rte_vhost_driver_set_protocol_features(path, protocol_features);
+
+	return 0;
+}
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..94bd9c1
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,118 @@
+/*
+**    INTEL CONFIDENTIAL
+**
+**    Copyright (c) Intel Corporation.
+**    All rights reserved.
+**
+**    The source code contained or described herein and all documents related
+**    to the source code ("Material") are owned by Intel Corporation or its
+**    suppliers or licensors.  Title to the Material remains with Intel
+**    Corporation or its suppliers and licensors.  The Material contains trade
+**    secrets and proprietary and confidential information of Intel or its
+**    suppliers and licensors.  The Material is protected by worldwide
+**    copyright and trade secret laws and treaty provisions.  No part of the
+**    Material may be used, copied, reproduced, modified, published, uploaded,
+**    posted, transmitted, distributed, or disclosed in any way without Intel's
+**    prior express written permission.
+**
+**    No license under any patent, copyright, trade secret or other
+**    intellectual property right is granted to or conferred upon you by
+**    disclosure or delivery of the Materials, either expressly, by
+**    implication, inducement, estoppel or otherwise.  Any license under such
+**    intellectual property rights must be express and approved by Intel in
+**    writing.
+*/
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define SPDK_VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function will set vhost user block
+ *
+ * @param path
+ *  socket path
+ */
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Install external hook to handle vhost user block message
+ *
+ * @param vid
+ *  vhost device id
+ */
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid);
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..b9e1be1
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,190 @@
+/*
+**    INTEL CONFIDENTIAL
+**
+**    Copyright (c) Intel Corporation.
+**    All rights reserved.
+**
+**    The source code contained or described herein and all documents related
+**    to the source code ("Material") are owned by Intel Corporation or its
+**    suppliers or licensors.  Title to the Material remains with Intel
+**    Corporation or its suppliers and licensors.  The Material contains trade
+**    secrets and proprietary and confidential information of Intel or its
+**    suppliers and licensors.  The Material is protected by worldwide
+**    copyright and trade secret laws and treaty provisions.  No part of the
+**    Material may be used, copied, reproduced, modified, published, uploaded,
+**    posted, transmitted, distributed, or disclosed in any way without Intel's
+**    prior express written permission.
+**
+**    No license under any patent, copyright, trade secret or other
+**    intellectual property right is granted to or conferred upon you by
+**    disclosure or delivery of the Materials, either expressly, by
+**    implication, inducement, estoppel or otherwise.  Any license under such
+**    intellectual property rights must be express and approved by Intel in
+**    writing.
+*/
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 07/15] usertools: add support for virtio blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (5 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 06/15] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (6 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 07/15] usertools: add support for virtio blk device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 09/15] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 1f832a3..eff6ff3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1384,6 +1384,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1401,7 +1411,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 09/15] vdpa/ifc: add some log at VDPA lauch before qemu connect
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (7 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index eff6ff3..0b4b77f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1447,6 +1447,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	__u64 capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1513,6 +1516,32 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/**
+		** cannot read 64-bit register in one attempt,
+		** so read byte by byte.
+		**/
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (__u64)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 10/15] vdpa/ifc: read virtio max_queues from hardware
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (8 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 09/15] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 0b4b77f..f092aca 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1542,6 +1542,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 11/15] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (9 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 113 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index f092aca..2552375 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -622,6 +624,108 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+
+	return;
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -648,10 +752,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -969,6 +1079,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (10 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 2552375..546f9bd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1621,11 +1621,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (11 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
  2022-01-25  6:47 ` [PATCH 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 3a69e53..a8a4728 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -216,10 +216,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -252,9 +260,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (12 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index a8a4728..7018048 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -116,7 +116,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 546f9bd..ff233bc 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -359,23 +359,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -766,7 +775,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH 15/15] vhost: make sure each queue callfd is configured
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (13 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

During the vhost data path building process, qemu will create a call fd at first,
and create another call fd in the end. The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will set the first
call fd. Even though the actual call fd will set, the data path will not work correctly.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 5eb1dd6..0be879a 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3137,12 +3137,27 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR,
 					 "Failed to configure vDPA device\n");
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/**
+		** when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		** close the device and config the device again,
+		** make sure the call fd of each queue is configed correctly.
+		**/
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-01-25  9:37   ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (14 more replies)
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (5 subsequent siblings)
  6 siblings, 15 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

v2:
 Fix some coding style issue.

Andy Pei (15):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add blk dev sw live migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA launch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
    same when blk device pause
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 520 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 +
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++
 examples/vdpa/vdpa_blk_compact.h | 117 +++++++++
 examples/vdpa/vhost_user.h       | 189 ++++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  14 ++
 usertools/dpdk-devbind.py        |   8 +
 12 files changed, 1034 insertions(+), 54 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 01/15] vdpa/ifc: add support for virtio blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 02/15] vhost: add vdpa ops for " Andy Pei
                       ` (13 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3853c4c..96b67dd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1170,6 +1177,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1181,6 +1230,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1230,13 +1280,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1248,7 +1309,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1316,6 +1378,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 02/15] vhost: add vdpa ops for blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-25  9:37     ` [PATCH v2 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index fc2d6ac..9a23db9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 03/15] vdpa/ifc: add blk ops for ifc device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-25  9:37     ` [PATCH v2 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-25  9:37     ` [PATCH v2 02/15] vhost: add vdpa ops for " Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 96b67dd..57fdd2c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1091,6 +1091,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1203,6 +1207,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	__u64 capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %lu",
+			len, sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (__u64)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1215,7 +1298,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
                       ` (10 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For the blk we need to relay all the cmd of each queue.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 57fdd2c..ef5b36c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 05/15] vdpa/ifc: add blk dev sw live migration
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 06/15] example/vdpa:add vdpa blk support in example Andy Pei
                       ` (9 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 116 insertions(+), 22 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index ef5b36c..14bc5c8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
 
 	rte_vhost_get_negotiated_features(vid, &features);
 	if (RTE_VHOST_NEED_LOG(features)) {
-		ifcvf_disable_logging(hw);
-		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-				log_base, IFCVF_LOG_BASE, log_size);
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
+		/* IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
 		/*
 		 * IFCVF marks dirty memory pages for only packet buffer,
 		 * SW helps to mark the used ring as dirty after device stops.
@@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
 				return -1;
 			}
 			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -688,7 +747,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
+		     internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -726,6 +790,8 @@ struct rte_vdpa_dev_info {
 			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
 			m_vring_iova, size);
 
+		hw->vring[i].last_avail_idx = vq.used->idx;
+		hw->vring[i].last_used_idx = vq.used->idx;
 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
 				hw->vring[i].last_used_idx);
 		rte_free(internal->m_vring[i].desc);
@@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -874,7 +959,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 06/15] example/vdpa:add vdpa blk support in example
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 07/15] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device suppoort to vdpa example.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 ++
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 117 ++++++++++++++++++++++++
 examples/vdpa/vhost_user.h       | 189 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 466 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
index d974db4..9d0479b 100644
--- a/examples/vdpa/Makefile
+++ b/examples/vdpa/Makefile
@@ -5,7 +5,7 @@
 APP = vdpa
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c vdpa_blk_compact.c
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 PKGCONF ?= pkg-config
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..924ad7b 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -156,6 +157,7 @@ struct vdpa_port {
 static const struct rte_vhost_device_ops vdpa_sample_devops = {
 	.new_device = new_device,
 	.destroy_device = destroy_device,
+	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
 };
 
 static int
@@ -192,6 +194,12 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
+		< 0)
+		rte_exit(EXIT_FAILURE,
+			"set vhost blk driver features and protocol features failed: %s\n",
+			socket_path);
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
index bd08605..f0d111c 100644
--- a/examples/vdpa/meson.build
+++ b/examples/vdpa/meson.build
@@ -15,4 +15,5 @@ deps += 'vhost'
 allow_experimental_apis = true
 sources = files(
         'main.c',
+	'vdpa_blk_compact.c',
 )
diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
new file mode 100644
index 0000000..0c4d3ee
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.c
@@ -0,0 +1,150 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+/* @file
+ *
+ * Block device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include <vdpa_driver.h>
+#include <rte_vhost.h>
+#include "vdpa_blk_compact.h"
+#include "vhost_user.h"
+
+#define VHOST_USER_GET_CONFIG	24
+#define VHOST_USER_SET_CONFIG	25
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
+/*
+ * Function to handle vhost user blk message
+ */
+static enum rte_vhost_msg_result
+rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
+{
+	struct VhostUserMsg *msg = _msg;
+	struct rte_vdpa_device *vdev = NULL;
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	if (vdev == NULL)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	fprintf(stderr, "msg is %d\n", msg->request.master);
+	switch (msg->request.master) {
+	case VHOST_USER_GET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
+
+		if (vdev->ops->get_config) {
+			fprintf(stdout, "get_config() function is valid!\n");
+			rc = vdev->ops->get_config(vid,
+						   msg->payload.cfg.region,
+						   msg->payload.cfg.size);
+			if (rc != 0) {
+				msg->size = 0;
+				fprintf(stdout, "get_config() return error!\n");
+			}
+		} else {
+			fprintf(stdout, "get_config() function is invalid!\n");
+		}
+
+		return RTE_VHOST_MSG_RESULT_REPLY;
+	}
+	case VHOST_USER_SET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout,
+			"read message VHOST_USER_SET_CONFIG\n");
+
+		if (vdev->ops->set_config) {
+			rc = vdev->ops->set_config(vid,
+				msg->payload.cfg.region,
+				msg->payload.cfg.offset,
+				msg->payload.cfg.size,
+				msg->payload.cfg.flags);
+		}
+
+		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+	}
+	default:
+		break;
+	}
+
+	return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
+}
+
+struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
+	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
+	.post_msg_handle = NULL,
+};
+
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid)
+{
+	int rc;
+
+	rc = rte_vhost_extern_callback_register(vid,
+						&g_blk_extern_vhost_ops,
+						NULL);
+	if (rc != 0) {
+		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
+		return -1;
+	}
+	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
+	return 0;
+}
+
+
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev)
+{
+	uint64_t protocol_features = 0;
+
+	if (!vdev) {
+		fprintf(stdout, "vdev is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* vdpa net does not have the get_config */
+	if (!vdev->ops->get_config)
+		return 0;
+
+	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
+	rte_vhost_driver_disable_features(path,
+		SPDK_VHOST_BLK_DISABLED_FEATURES);
+
+	rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+	rte_vhost_driver_set_protocol_features(path, protocol_features);
+
+	return 0;
+}
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..420d48e
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,117 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define SPDK_VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function will set vhost user block
+ *
+ * @param path
+ *  socket path
+ */
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Install external hook to handle vhost user block message
+ *
+ * @param vid
+ *  vhost device id
+ */
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid);
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..8b747d0
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,189 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 07/15] usertools: add support for virtio blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 06/15] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                       ` (7 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to devbind.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 07/15] usertools: add support for virtio blk device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 14bc5c8..00e7274 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1373,6 +1373,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1390,7 +1400,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add some log of virtio blk device config space information
at VDPA launch before qemu connects.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 00e7274..ff91e80 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1436,6 +1436,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	__u64 capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1502,6 +1505,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (__u64)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 10/15] vdpa/ifc: read virtio max_queues from hardware
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                       ` (4 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index ff91e80..d30c3fd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1530,6 +1530,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 11/15] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                       ` (3 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 111 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index d30c3fd..981cb26 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -617,6 +619,106 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -643,10 +745,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -963,6 +1071,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                       ` (2 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add is_blk flag to ifcvf_hw, and init is_blk during probe.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 981cb26..4eb8f98 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1608,11 +1608,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
  2022-01-25  9:37     ` [PATCH v2 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 3a69e53..a8a4728 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -216,10 +216,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -252,9 +260,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

When virtio blk device is pause, make sure hardware last_avail_idx and last_used_idx is the same.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index a8a4728..7018048 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -116,7 +116,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4eb8f98..b0b2859 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -356,23 +356,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -759,7 +768,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v2 15/15] vhost: make sure each queue callfd is configured
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-27  7:13       ` Xia, Chenbo
  14 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 5eb1dd6..b25b25f 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3137,12 +3137,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR,
 					 "Failed to configure vDPA device\n");
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v2 15/15] vhost: make sure each queue callfd is configured
  2022-01-25  9:37     ` [PATCH v2 15/15] vhost: make sure each queue callfd is configured Andy Pei
@ 2022-01-27  7:13       ` Xia, Chenbo
  2022-01-29  3:11         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-01-27  7:13 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Andy,

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Tuesday, January 25, 2022 5:37 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao, Gang
> <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v2 15/15] vhost: make sure each queue callfd is configured
> 
> During the vhost data path building process, qemu will create
> a call fd at first, and create another call fd in the end.
> The final call fd will be used to relay notify.
> In the original code, after kick fd is set, dev_conf will
> set the first call fd. Even though the actual call fd will set,
> the data path will not work correctly.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 1.8.3.1

Please fix all reported error on patchwork first.

http://patchwork.dpdk.org/project/dpdk/patch/1643103437-118618-16-git-send-email-andy.pei@intel.com/

Thanks,
Chenbo


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-01-29  3:03   ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (14 more replies)
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (4 subsequent siblings)
  6 siblings, 15 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.

Andy Pei (15):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add blk dev sw live migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA launch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the
    same when blk device pause
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 520 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 +
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++
 examples/vdpa/vdpa_blk_compact.h | 117 +++++++++
 examples/vdpa/vhost_user.h       | 189 ++++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  14 ++
 usertools/dpdk-devbind.py        |   8 +
 12 files changed, 1034 insertions(+), 54 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22  8:57       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 02/15] vhost: add vdpa ops for " Andy Pei
                       ` (13 subsequent siblings)
  14 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3853c4c..96b67dd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1170,6 +1177,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1181,6 +1230,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1230,13 +1280,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1248,7 +1309,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1316,6 +1378,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 02/15] vhost: add vdpa ops for blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-29  3:03     ` [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22  9:12       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  14 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index fc2d6ac..9a23db9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-29  3:03     ` [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-29  3:03     ` [PATCH v3 02/15] vhost: add vdpa ops for " Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22  9:58       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  14 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 96b67dd..778e1fd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1091,6 +1091,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1203,6 +1207,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1215,7 +1298,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22 10:04       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
                       ` (10 subsequent siblings)
  14 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For the blk we need to relay all the cmd of each queue.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 778e1fd..4f99bb3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22 11:10       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 06/15] example/vdpa:add vdpa blk support in example Andy Pei
                       ` (9 subsequent siblings)
  14 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 116 insertions(+), 22 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4f99bb3..a930825 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
 
 	rte_vhost_get_negotiated_features(vid, &features);
 	if (RTE_VHOST_NEED_LOG(features)) {
-		ifcvf_disable_logging(hw);
-		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-				log_base, IFCVF_LOG_BASE, log_size);
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
+		/* IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
 		/*
 		 * IFCVF marks dirty memory pages for only packet buffer,
 		 * SW helps to mark the used ring as dirty after device stops.
@@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
 				return -1;
 			}
 			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -688,7 +747,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
+		     internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -726,6 +790,8 @@ struct rte_vdpa_dev_info {
 			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
 			m_vring_iova, size);
 
+		hw->vring[i].last_avail_idx = vq.used->idx;
+		hw->vring[i].last_used_idx = vq.used->idx;
 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
 				hw->vring[i].last_used_idx);
 		rte_free(internal->m_vring[i].desc);
@@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -874,7 +959,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 06/15] example/vdpa:add vdpa blk support in example
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22 11:29       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 07/15] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  14 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to vdpa example.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 ++
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 117 ++++++++++++++++++++++++
 examples/vdpa/vhost_user.h       | 189 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 466 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
index d974db4..9d0479b 100644
--- a/examples/vdpa/Makefile
+++ b/examples/vdpa/Makefile
@@ -5,7 +5,7 @@
 APP = vdpa
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c vdpa_blk_compact.c
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 PKGCONF ?= pkg-config
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..924ad7b 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -156,6 +157,7 @@ struct vdpa_port {
 static const struct rte_vhost_device_ops vdpa_sample_devops = {
 	.new_device = new_device,
 	.destroy_device = destroy_device,
+	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
 };
 
 static int
@@ -192,6 +194,12 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
+		< 0)
+		rte_exit(EXIT_FAILURE,
+			"set vhost blk driver features and protocol features failed: %s\n",
+			socket_path);
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
index bd08605..f0d111c 100644
--- a/examples/vdpa/meson.build
+++ b/examples/vdpa/meson.build
@@ -15,4 +15,5 @@ deps += 'vhost'
 allow_experimental_apis = true
 sources = files(
         'main.c',
+	'vdpa_blk_compact.c',
 )
diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
new file mode 100644
index 0000000..0c4d3ee
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.c
@@ -0,0 +1,150 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+/* @file
+ *
+ * Block device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include <vdpa_driver.h>
+#include <rte_vhost.h>
+#include "vdpa_blk_compact.h"
+#include "vhost_user.h"
+
+#define VHOST_USER_GET_CONFIG	24
+#define VHOST_USER_SET_CONFIG	25
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
+/*
+ * Function to handle vhost user blk message
+ */
+static enum rte_vhost_msg_result
+rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
+{
+	struct VhostUserMsg *msg = _msg;
+	struct rte_vdpa_device *vdev = NULL;
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	if (vdev == NULL)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	fprintf(stderr, "msg is %d\n", msg->request.master);
+	switch (msg->request.master) {
+	case VHOST_USER_GET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
+
+		if (vdev->ops->get_config) {
+			fprintf(stdout, "get_config() function is valid!\n");
+			rc = vdev->ops->get_config(vid,
+						   msg->payload.cfg.region,
+						   msg->payload.cfg.size);
+			if (rc != 0) {
+				msg->size = 0;
+				fprintf(stdout, "get_config() return error!\n");
+			}
+		} else {
+			fprintf(stdout, "get_config() function is invalid!\n");
+		}
+
+		return RTE_VHOST_MSG_RESULT_REPLY;
+	}
+	case VHOST_USER_SET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout,
+			"read message VHOST_USER_SET_CONFIG\n");
+
+		if (vdev->ops->set_config) {
+			rc = vdev->ops->set_config(vid,
+				msg->payload.cfg.region,
+				msg->payload.cfg.offset,
+				msg->payload.cfg.size,
+				msg->payload.cfg.flags);
+		}
+
+		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+	}
+	default:
+		break;
+	}
+
+	return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
+}
+
+struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
+	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
+	.post_msg_handle = NULL,
+};
+
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid)
+{
+	int rc;
+
+	rc = rte_vhost_extern_callback_register(vid,
+						&g_blk_extern_vhost_ops,
+						NULL);
+	if (rc != 0) {
+		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
+		return -1;
+	}
+	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
+	return 0;
+}
+
+
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev)
+{
+	uint64_t protocol_features = 0;
+
+	if (!vdev) {
+		fprintf(stdout, "vdev is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* vdpa net does not have the get_config */
+	if (!vdev->ops->get_config)
+		return 0;
+
+	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
+	rte_vhost_driver_disable_features(path,
+		SPDK_VHOST_BLK_DISABLED_FEATURES);
+
+	rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+	rte_vhost_driver_set_protocol_features(path, protocol_features);
+
+	return 0;
+}
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..420d48e
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,117 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define SPDK_VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function will set vhost user block
+ *
+ * @param path
+ *  socket path
+ */
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Install external hook to handle vhost user block message
+ *
+ * @param vid
+ *  vhost device id
+ */
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid);
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..8b747d0
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,189 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 07/15] usertools: add support for virtio blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 06/15] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                       ` (7 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to devbind.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 07/15] usertools: add support for virtio blk device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index a930825..24ae27b 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1373,6 +1373,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1390,7 +1400,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add some log of virtio blk device config space information
at VDPA launch before qemu connects.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 24ae27b..3c4e5f6 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1436,6 +1436,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1502,6 +1505,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 10/15] vdpa/ifc: read virtio max_queues from hardware
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                       ` (4 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3c4e5f6..86dd1c6 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1530,6 +1530,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 11/15] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                       ` (3 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 111 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 86dd1c6..37fa45e 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -617,6 +619,106 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -643,10 +745,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -963,6 +1071,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                       ` (2 subsequent siblings)
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add is_blk flag to ifcvf_hw, and init is_blk during probe.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 37fa45e..b65e3a3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1608,11 +1608,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the same when blk device pause Andy Pei
  2022-01-29  3:03     ` [PATCH v3 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 3a69e53..a8a4728 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -216,10 +216,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -252,9 +260,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the same when blk device pause
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

When virtio blk device is pause, make sure hardware last_avail_idx
and last_used_idx are the same.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index a8a4728..7018048 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -116,7 +116,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index b65e3a3..75dbe63 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -356,23 +356,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -759,7 +768,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v3 15/15] vhost: make sure each queue callfd is configured
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the same when blk device pause Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  14 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 5eb1dd6..b25b25f 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3137,12 +3137,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR,
 					 "Failed to configure vDPA device\n");
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v2 15/15] vhost: make sure each queue callfd is configured
  2022-01-27  7:13       ` Xia, Chenbo
@ 2022-01-29  3:11         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-01-29  3:11 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Chenbo,

Thanks for your reply.
I will send out a V3 patch set to address that.


-----Original Message-----
From: Xia, Chenbo <chenbo.xia@intel.com> 
Sent: Thursday, January 27, 2022 3:13 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: RE: [PATCH v2 15/15] vhost: make sure each queue callfd is configured

Hi Andy,

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Tuesday, January 25, 2022 5:37 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; 
> Cao, Gang <gang.cao@intel.com>; Liu, Changpeng 
> <changpeng.liu@intel.com>
> Subject: [PATCH v2 15/15] vhost: make sure each queue callfd is 
> configured
> 
> During the vhost data path building process, qemu will create a call 
> fd at first, and create another call fd in the end.
> The final call fd will be used to relay notify.
> In the original code, after kick fd is set, dev_conf will set the 
> first call fd. Even though the actual call fd will set, the data path 
> will not work correctly.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 1.8.3.1

Please fix all reported error on patchwork first.

http://patchwork.dpdk.org/project/dpdk/patch/1643103437-118618-16-git-send-email-andy.pei@intel.com/

Thanks,
Chenbo



^ permalink raw reply	[flat|nested] 191+ messages in thread

* Re: [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device
  2022-01-29  3:03     ` [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-03-22  8:57       ` Maxime Coquelin
  0 siblings, 0 replies; 191+ messages in thread
From: Maxime Coquelin @ 2022-03-22  8:57 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu



On 1/29/22 04:03, Andy Pei wrote:
> Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
> Blk and net device are implemented with proper feature and ops.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
>   2 files changed, 98 insertions(+), 10 deletions(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 191+ messages in thread

* Re: [PATCH v3 02/15] vhost: add vdpa ops for blk device
  2022-01-29  3:03     ` [PATCH v3 02/15] vhost: add vdpa ops for " Andy Pei
@ 2022-03-22  9:12       ` Maxime Coquelin
  0 siblings, 0 replies; 191+ messages in thread
From: Maxime Coquelin @ 2022-03-22  9:12 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu



On 1/29/22 04:03, Andy Pei wrote:
> Get_config and set_config are necessary ops for blk device.
> Add get_config and set_config ops to vdpa ops.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   lib/vhost/vdpa_driver.h | 8 ++++++--
>   1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
> index fc2d6ac..9a23db9 100644
> --- a/lib/vhost/vdpa_driver.h
> +++ b/lib/vhost/vdpa_driver.h
> @@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
>   	/** Reset statistics of the queue */
>   	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
>   
> -	/** Reserved for future extension */
> -	void *reserved[2];
> +	/** Get the device configuration space */
> +	int (*get_config)(int vid, uint8_t *config, uint32_t len);
> +
> +	/** Set the device configuration space */
> +	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
> +		      uint32_t size, uint32_t flags);
>   };
>   
>   /**

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 191+ messages in thread

* Re: [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device
  2022-01-29  3:03     ` [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-03-22  9:58       ` Maxime Coquelin
  0 siblings, 0 replies; 191+ messages in thread
From: Maxime Coquelin @ 2022-03-22  9:58 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu



On 1/29/22 04:03, Andy Pei wrote:
> For virtio blk device, re-use part of ifc driver ops.
> Implement ifcvf_blk_get_config for virtio blk device.
> Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
> blk device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.h |  4 ++
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
>   2 files changed, 88 insertions(+), 1 deletion(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 191+ messages in thread

* Re: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-01-29  3:03     ` [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-03-22 10:04       ` Maxime Coquelin
  2022-03-23  7:07         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Maxime Coquelin @ 2022-03-22 10:04 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu



On 1/29/22 04:03, Andy Pei wrote:
> For the blk we need to relay all the cmd of each queue.

The message is not clear to me, do you mean "For the block device type,
we have to relay the commands on all queues."?

> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
>   1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 778e1fd..4f99bb3 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
>   	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>   	irq_set->start = 0;
>   	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change notification */
>   	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>   		rte_intr_fd_get(internal->pdev->intr_handle);
>   
>   	for (i = 0; i < nr_vring; i++)
>   		internal->intr_fd[i] = -1;
>   
> -	for (i = 0; i < nr_vring; i++) {
> -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -			if (fd < 0) {
> -				DRV_LOG(ERR, "can't setup eventfd: %s",
> -					strerror(errno));
> -				return -1;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if ((i & 1) == 0 && m_rx == true) {
> +				/* For the net we only need to relay rx queue,
> +				 * which will change the mem of VM.
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> +			}
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if (m_rx == true) {
> +				/* For the blk we need to relay all the read cmd
> +				 * of each queue
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;


>   			}
> -			internal->intr_fd[i] = fd;
> -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>   		}
>   	}
>   


^ permalink raw reply	[flat|nested] 191+ messages in thread

* Re: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration
  2022-01-29  3:03     ` [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
@ 2022-03-22 11:10       ` Maxime Coquelin
  2022-03-23  9:08         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Maxime Coquelin @ 2022-03-22 11:10 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu

Hi Andy,

"vdpa/ifc: add block device SW live-migration"

On 1/29/22 04:03, Andy Pei wrote:
> Enable virtio blk sw live migration relay callfd and log the dirty page.

Please try to make the above sentence simpler. Also, it seems that below
patch changes behaviour for net devices, so the commit message should
explain that.

> In this version we ignore the write cmd and still mark it dirty.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.c |   4 +-
>   drivers/vdpa/ifc/base/ifcvf.h |   6 ++
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
>   3 files changed, 116 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
> index 721cb1d..3a69e53 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -189,7 +189,7 @@
>   	IFCVF_WRITE_REG32(val >> 32, hi);
>   }
>   
> -STATIC int
> +int
>   ifcvf_hw_enable(struct ifcvf_hw *hw)
>   {
>   	struct ifcvf_pci_common_cfg *cfg;
> @@ -238,7 +238,7 @@
>   	return 0;
>   }
>   
> -STATIC void
> +void
>   ifcvf_hw_disable(struct ifcvf_hw *hw)
>   {
>   	u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
>   u64
>   ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
>   
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
>   #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 4f99bb3..a930825 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
>   
>   	rte_vhost_get_negotiated_features(vid, &features);
>   	if (RTE_VHOST_NEED_LOG(features)) {
> -		ifcvf_disable_logging(hw);
> -		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> -		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> -				log_base, IFCVF_LOG_BASE, log_size);
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
> +		/* IFCVF marks dirty memory pages for only packet buffer,
> +		 * SW helps to mark the used ring as dirty after device stops.
> +		 */
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> +			rte_vhost_log_used_vring(vid, i, 0, len);
> +		}
> +	}
> +}
> +
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
> +{
> +	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
> +	int i, vid;
> +	uint64_t features = 0;
> +	uint64_t log_base = 0, log_size = 0;
> +	uint64_t len;
> +
> +	vid = internal->vid;
> +
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +
> +	ifcvf_hw_disable(hw);
> +
> +	for (i = 0; i < hw->nr_vring; i++)
> +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> +				hw->vring[i].last_used_idx);
> +
> +	if (internal->sw_lm)
> +		return;
> +
> +	rte_vhost_get_negotiated_features(vid, &features);
> +	if (RTE_VHOST_NEED_LOG(features)) {
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
>   		/*
>   		 * IFCVF marks dirty memory pages for only packet buffer,
>   		 * SW helps to mark the used ring as dirty after device stops.
> @@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
>   		}
>   		hw->vring[i].avail = gpa;
>   
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> +		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
> +		 * BLK: relay every queue
> +		 */
> +		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
>   			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
>   			if (gpa == 0) {
>   				DRV_LOG(ERR, "Fail to get GPA for used ring.");
>   				return -1;
>   			}
>   			hw->vring[i].used = gpa;
> -		} else {
> +		} else if (internal->device_type == IFCVF_BLK) {
>   			hw->vring[i].used = m_vring_iova +
>   				(char *)internal->m_vring[i].used -
>   				(char *)internal->m_vring[i].desc;
> @@ -688,7 +747,10 @@ struct rte_vdpa_dev_info {
>   	}
>   	hw->nr_vring = nr_vring;
>   
> -	return ifcvf_start_hw(&internal->hw);
> +	if (internal->device_type == IFCVF_NET)
> +		return ifcvf_start_hw(&internal->hw);
> +	else if (internal->device_type == IFCVF_BLK)
> +		return ifcvf_hw_enable(&internal->hw);
>   
>   error:
>   	for (i = 0; i < nr_vring; i++)
> @@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
>   
>   	for (i = 0; i < hw->nr_vring; i++) {
>   		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
> +		     internal->device_type == IFCVF_BLK) {
>   			update_used_ring(internal, i);
> +		}
>   
>   		rte_vhost_get_vhost_vring(vid, i, &vq);
>   		len = IFCVF_USED_RING_LEN(vq.size);
> @@ -726,6 +790,8 @@ struct rte_vdpa_dev_info {
>   			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
>   			m_vring_iova, size);
>   
> +		hw->vring[i].last_avail_idx = vq.used->idx;
> +		hw->vring[i].last_used_idx = vq.used->idx;
>   		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
>   				hw->vring[i].last_used_idx);
>   		rte_free(internal->m_vring[i].desc);
> @@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
>   		}
>   	}
>   
> -	for (qid = 0; qid < q_num; qid += 2) {
> -		ev.events = EPOLLIN | EPOLLPRI;
> -		/* leave a flag to mark it's for interrupt */
> -		ev.data.u64 = 1 | qid << 1 |
> -			(uint64_t)internal->intr_fd[qid] << 32;
> -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> -				< 0) {
> -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> -			return NULL;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (qid = 0; qid < q_num; qid += 2) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (qid = 0; qid < q_num; qid += 1) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
>   		}
> -		update_used_ring(internal, qid);
>   	}
>   
>   	/* start relay with a first kick */
> @@ -874,7 +959,10 @@ struct rte_vdpa_dev_info {
>   
>   	/* stop the direct IO data path */
>   	unset_notify_relay(internal);
> -	vdpa_ifcvf_stop(internal);
> +	if (internal->device_type == IFCVF_NET)
> +		vdpa_ifcvf_stop(internal);
> +	else if (internal->device_type == IFCVF_BLK)
> +		vdpa_ifcvf_blk_pause(internal);
>   	vdpa_disable_vfio_intr(internal);
>   
>   	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);


^ permalink raw reply	[flat|nested] 191+ messages in thread

* Re: [PATCH v3 06/15] example/vdpa:add vdpa blk support in example
  2022-01-29  3:03     ` [PATCH v3 06/15] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-03-22 11:29       ` Maxime Coquelin
  2022-03-23  9:31         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Maxime Coquelin @ 2022-03-22 11:29 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu



On 1/29/22 04:03, Andy Pei wrote:
> Add virtio blk device support to vdpa example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   examples/vdpa/Makefile           |   2 +-
>   examples/vdpa/main.c             |   8 ++
>   examples/vdpa/meson.build        |   1 +
>   examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++++++++++++++++++++++
>   examples/vdpa/vdpa_blk_compact.h | 117 ++++++++++++++++++++++++
>   examples/vdpa/vhost_user.h       | 189 +++++++++++++++++++++++++++++++++++++++
>   6 files changed, 466 insertions(+), 1 deletion(-)
>   create mode 100644 examples/vdpa/vdpa_blk_compact.c
>   create mode 100644 examples/vdpa/vdpa_blk_compact.h
>   create mode 100644 examples/vdpa/vhost_user.h
> 
> diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
> index d974db4..9d0479b 100644
> --- a/examples/vdpa/Makefile
> +++ b/examples/vdpa/Makefile
> @@ -5,7 +5,7 @@
>   APP = vdpa
>   
>   # all source are stored in SRCS-y
> -SRCS-y := main.c
> +SRCS-y := main.c vdpa_blk_compact.c
>   CFLAGS += -DALLOW_EXPERIMENTAL_API
>   
>   PKGCONF ?= pkg-config
> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
> index 5ab0765..924ad7b 100644
> --- a/examples/vdpa/main.c
> +++ b/examples/vdpa/main.c
> @@ -20,6 +20,7 @@
>   #include <cmdline_parse_string.h>
>   #include <cmdline_parse_num.h>
>   #include <cmdline.h>
> +#include "vdpa_blk_compact.h"
>   
>   #define MAX_PATH_LEN 128
>   #define MAX_VDPA_SAMPLE_PORTS 1024
> @@ -156,6 +157,7 @@ struct vdpa_port {
>   static const struct rte_vhost_device_ops vdpa_sample_devops = {
>   	.new_device = new_device,
>   	.destroy_device = destroy_device,
> +	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
>   };
>   
>   static int
> @@ -192,6 +194,12 @@ struct vdpa_port {
>   			"attach vdpa device failed: %s\n",
>   			socket_path);
>   
> +	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
> +		< 0)
> +		rte_exit(EXIT_FAILURE,
> +			"set vhost blk driver features and protocol features failed: %s\n",
> +			socket_path);
> +

That does not look right, blk devices specitic functions shuold be
called only for block devices.

>   	if (rte_vhost_driver_start(socket_path) < 0)
>   		rte_exit(EXIT_FAILURE,
>   			"start vhost driver failed: %s\n",
> diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
> index bd08605..f0d111c 100644
> --- a/examples/vdpa/meson.build
> +++ b/examples/vdpa/meson.build
> @@ -15,4 +15,5 @@ deps += 'vhost'
>   allow_experimental_apis = true
>   sources = files(
>           'main.c',
> +	'vdpa_blk_compact.c',
>   )
> diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
> new file mode 100644
> index 0000000..0c4d3ee
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.c
> @@ -0,0 +1,150 @@
> +/*    INTEL CONFIDENTIAL
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +/* @file
> + *
> + * Block device specific vhost lib
> + */
> +
> +#include <stdbool.h>
> +
> +#include <rte_malloc.h>
> +#include <vdpa_driver.h>

That's wrong, the application is not supposed to include the driver
APIs.

> +#include <rte_vhost.h>
> +#include "vdpa_blk_compact.h"
> +#include "vhost_user.h"
> +
> +#define VHOST_USER_GET_CONFIG	24
> +#define VHOST_USER_SET_CONFIG	25
> +
> +#ifndef VHOST_USER_PROTOCOL_F_CONFIG
> +#define VHOST_USER_PROTOCOL_F_CONFIG   9
> +#endif
> +
> +/*
> + * Function to handle vhost user blk message
> + */
> +static enum rte_vhost_msg_result
> +rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
> +{
> +	struct VhostUserMsg *msg = _msg;
> +	struct rte_vdpa_device *vdev = NULL;
> +
> +	vdev = rte_vhost_get_vdpa_device(vid);
> +	if (vdev == NULL)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	fprintf(stderr, "msg is %d\n", msg->request.master);
> +	switch (msg->request.master) {
> +	case VHOST_USER_GET_CONFIG: {
> +		int rc = 0;
> +
> +		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
> +
> +		if (vdev->ops->get_config) {
> +			fprintf(stdout, "get_config() function is valid!\n");
> +			rc = vdev->ops->get_config(vid,
> +						   msg->payload.cfg.region,
> +						   msg->payload.cfg.size);
> +			if (rc != 0) {
> +				msg->size = 0;
> +				fprintf(stdout, "get_config() return error!\n");
> +			}
> +		} else {
> +			fprintf(stdout, "get_config() function is invalid!\n");
> +		}
> +
> +		return RTE_VHOST_MSG_RESULT_REPLY;
> +	}
> +	case VHOST_USER_SET_CONFIG: {
> +		int rc = 0;
> +
> +		fprintf(stdout,
> +			"read message VHOST_USER_SET_CONFIG\n");
> +
> +		if (vdev->ops->set_config) {
> +			rc = vdev->ops->set_config(vid,
> +				msg->payload.cfg.region,
> +				msg->payload.cfg.offset,
> +				msg->payload.cfg.size,
> +				msg->payload.cfg.flags);
> +		}
> +
> +		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
> +	}
> +	default:
> +		break;
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
> +}

I think above message handling should be done in the Vhost library
directly. VHOST_USER_SET_CONFIG and VHOST_USER_GET_CONFIG are not
specific to blk backends, these are generic messages.

> +
> +struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
> +	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
> +	.post_msg_handle = NULL,
> +};
> +
> +int
> +rte_vhost_blk_session_install_rte_compat_hooks(int vid)
> +{
> +	int rc;
> +
> +	rc = rte_vhost_extern_callback_register(vid,
> +						&g_blk_extern_vhost_ops,
> +						NULL);
> +	if (rc != 0) {
> +		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
> +		return -1;
> +	}
> +	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
> +	return 0;
> +}
> +
> +
> +int
> +vdpa_blk_device_set_features_and_protocol(const char *path,
> +	struct rte_vdpa_device *vdev)
> +{
> +	uint64_t protocol_features = 0;
> +
> +	if (!vdev) {
> +		fprintf(stdout, "vdev is NULL.\n");
> +		return -EINVAL;
> +	}
> +
> +	/* vdpa net does not have the get_config */
> +	if (!vdev->ops->get_config)
> +		return 0;

That's not good, as I said earlier, the drivers callback should not be
visible to the application. Maybe the VDPA API should be extended to
return the device type, I'm not sure, but accessing the drivers ops is
prohibited.

> +	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
> +	rte_vhost_driver_disable_features(path,
> +		SPDK_VHOST_BLK_DISABLED_FEATURES);
> +
> +	rte_vhost_driver_get_protocol_features(path, &protocol_features);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
> +	rte_vhost_driver_set_protocol_features(path, protocol_features);
> +
> +	return 0;
> +}
> diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
> new file mode 100644
> index 0000000..420d48e
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.h
> @@ -0,0 +1,117 @@
> +/*    INTEL CONFIDENTIAL

I hope it was not supposed to be confidential :)

> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +#ifndef _VDPA_BLK_COMPACT_H_
> +#define _VDPA_BLK_COMPACT_H_
> +
> +/**
> + * @file
> + *
> + * Device specific vhost lib
> + */
> +/vdpa
> +#include <stdbool.h>
> +
> +#include <rte_pci.h>
> +#include <rte_vhost.h>
> +
> +/* Feature bits */
> +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
> +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
> +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> +#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
> +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
> +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
> +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
> +
> +/* Legacy feature bits */
> +#ifndef VIRTIO_BLK_NO_LEGACY
> +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
> +#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
> +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
> +
> +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
> +#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
> +#endif /* !VIRTIO_BLK_NO_LEGACY */
> +
> +#ifndef VHOST_USER_F_PROTOCOL_FEATURES
> +#define VHOST_USER_F_PROTOCOL_FEATURES 30
> +#endif
> +
> +#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \

Why these references to SPDK?

> +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> +	(1ULL << VIRTIO_F_VERSION_1) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
> +
> +#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
> +
> +#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
> +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
> +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
> +	(1ULL << VIRTIO_BLK_F_MQ))
> +
> +/* Not supported features */
> +#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
> +	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
> +
> +/* Vhost-blk support protocol features */
> +#define SPDK_VHOST_BLK_PROTOCOL_FEATURES \
> +	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
> +	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * This function will set vhost user block
> + *
> + * @param path
> + *  socket path
> + */
> +int
> +vdpa_blk_device_set_features_and_protocol(const char *path,
> +	struct rte_vdpa_device *vdev);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Install external hook to handle vhost user block message
> + *
> + * @param vid
> + *  vhost device id
> + */
> +int
> +rte_vhost_blk_session_install_rte_compat_hooks(int vid);
> +
> +#endif /* _VDPA_BLK_COMPACT_H_ */
> diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
> new file mode 100644
> index 0000000..8b747d0
> --- /dev/null
> +++ b/examples/vdpa/vhost_user.h
> @@ -0,0 +1,189 @@
> +/*    INTEL CONFIDENTIAL
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "rte_vhost.h"
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +
> +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +#endif
> +
> +#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
> +			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
> +
> +typedef enum VhostUserRequest {
> +	VHOST_USER_NONE = 0,
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SET_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> +	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> +	VHOST_USER_POSTCOPY_ADVISE = 28,
> +	VHOST_USER_POSTCOPY_LISTEN = 29,
> +	VHOST_USER_POSTCOPY_END = 30,
> +	VHOST_USER_GET_INFLIGHT_FD = 31,
> +	VHOST_USER_SET_INFLIGHT_FD = 32,
> +	VHOST_USER_MAX = 33
> +} VhostUserRequest;
> +
> +typedef enum VhostUserSlaveRequest {
> +	VHOST_USER_SLAVE_NONE = 0,
> +	VHOST_USER_SLAVE_IOTLB_MSG = 1,
> +	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
> +	VHOST_USER_SLAVE_MAX
> +} VhostUserSlaveRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +	uint64_t guest_phys_addr;
> +	uint64_t memory_size;
> +	uint64_t userspace_addr;
> +	uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +	uint32_t nregions;
> +	uint32_t padding;
> +	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserLog {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +} VhostUserLog;
> +
> +/* Comply with Cryptodev-Linux */
> +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
> +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
> +
> +/* Same structure as vhost-user backend session info */
> +typedef struct VhostUserCryptoSessionParam {
> +	int64_t session_id;
> +	uint32_t op_code;
> +	uint32_t cipher_algo;
> +	uint32_t cipher_key_len;
> +	uint32_t hash_algo;
> +	uint32_t digest_len;
> +	uint32_t auth_key_len;
> +	uint32_t aad_len;
> +	uint8_t op_type;
> +	uint8_t dir;
> +	uint8_t hash_mode;
> +	uint8_t chaining_dir;
> +	uint8_t *ciphe_key;
> +	uint8_t *auth_key;
> +	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
> +	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
> +} VhostUserCryptoSessionParam;
> +
> +typedef struct VhostUserVringArea {
> +	uint64_t u64;
> +	uint64_t size;
> +	uint64_t offset;
> +} VhostUserVringArea;
> +
> +typedef struct VhostUserInflight {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +	uint16_t num_queues;
> +	uint16_t queue_size;
> +} VhostUserInflight;
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
> +typedef struct VhostUserMsg {
> +	union {
> +		uint32_t master; /* a VhostUserRequest value */
> +		uint32_t slave;  /* a VhostUserSlaveRequest value*/
> +	} request;
> +
> +#define VHOST_USER_VERSION_MASK     0x3
> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> +#define VHOST_USER_NEED_REPLY		(0x1 << 3)
> +	uint32_t flags;
> +	uint32_t size; /* the following payload size */
> +	union {
> +#define VHOST_USER_VRING_IDX_MASK   0xff
> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> +		uint64_t u64;
> +		struct vhost_vring_state state;
> +		struct vhost_vring_addr addr;
> +		VhostUserMemory memory;
> +		VhostUserLog    log;
> +		struct vhost_iotlb_msg iotlb;
> +		VhostUserCryptoSessionParam crypto_session;
> +		VhostUserVringArea area;
> +		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
> +	} payload;
> +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> +	int fd_num;
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    0x1
> +#endif


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-03-22 10:04       ` Maxime Coquelin
@ 2022-03-23  7:07         ` Pei, Andy
  2022-03-23  7:42           ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Pei, Andy @ 2022-03-23  7:07 UTC (permalink / raw)
  To: Maxime Coquelin, dev; +Cc: Xia, Chenbo, Cao, Gang, Liu, Changpeng

Hi Maxime,

Thanks for your reply and my reply is inline.

-----Original Message-----
From: Maxime Coquelin <maxime.coquelin@redhat.com> 
Sent: Tuesday, March 22, 2022 6:05 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: Re: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device



On 1/29/22 04:03, Andy Pei wrote:
> For the blk we need to relay all the cmd of each queue.

The message is not clear to me, do you mean "For the block device type, we have to relay the commands on all queues."?
Andy: Yes. For BLK device, device can work with single queue, comparing to NET device, NET device use queue pair.

> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
>   1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c 
> b/drivers/vdpa/ifc/ifcvf_vdpa.c index 778e1fd..4f99bb3 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
>   	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>   	irq_set->start = 0;
>   	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change 
> +notification */
>   	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>   		rte_intr_fd_get(internal->pdev->intr_handle);
>   
>   	for (i = 0; i < nr_vring; i++)
>   		internal->intr_fd[i] = -1;
>   
> -	for (i = 0; i < nr_vring; i++) {
> -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -			if (fd < 0) {
> -				DRV_LOG(ERR, "can't setup eventfd: %s",
> -					strerror(errno));
> -				return -1;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if ((i & 1) == 0 && m_rx == true) {
> +				/* For the net we only need to relay rx queue,
> +				 * which will change the mem of VM.
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> +			}
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if (m_rx == true) {
> +				/* For the blk we need to relay all the read cmd
> +				 * of each queue
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;


>   			}
> -			internal->intr_fd[i] = fd;
> -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>   		}
>   	}
>   


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-03-23  7:07         ` Pei, Andy
@ 2022-03-23  7:42           ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-03-23  7:42 UTC (permalink / raw)
  To: Pei, Andy, Maxime Coquelin, dev; +Cc: Xia, Chenbo, Cao, Gang, Liu, Changpeng

Hi Maxime,

I think it is better to change the commit log to your description.
"For the block device type, we have to relay the commands on all queues."
In the next version of patch set.

-----Original Message-----
From: Pei, Andy <andy.pei@intel.com> 
Sent: Wednesday, March 23, 2022 3:08 PM
To: Maxime Coquelin <maxime.coquelin@redhat.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: RE: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device

Hi Maxime,

Thanks for your reply and my reply is inline.

-----Original Message-----
From: Maxime Coquelin <maxime.coquelin@redhat.com> 
Sent: Tuesday, March 22, 2022 6:05 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: Re: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device



On 1/29/22 04:03, Andy Pei wrote:
> For the blk we need to relay all the cmd of each queue.

The message is not clear to me, do you mean "For the block device type, we have to relay the commands on all queues."?
Andy: Yes. For BLK device, device can work with single queue, comparing to NET device, NET device use queue pair.

> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
>   1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c 
> b/drivers/vdpa/ifc/ifcvf_vdpa.c index 778e1fd..4f99bb3 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
>   	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>   	irq_set->start = 0;
>   	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change 
> +notification */
>   	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>   		rte_intr_fd_get(internal->pdev->intr_handle);
>   
>   	for (i = 0; i < nr_vring; i++)
>   		internal->intr_fd[i] = -1;
>   
> -	for (i = 0; i < nr_vring; i++) {
> -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -			if (fd < 0) {
> -				DRV_LOG(ERR, "can't setup eventfd: %s",
> -					strerror(errno));
> -				return -1;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if ((i & 1) == 0 && m_rx == true) {
> +				/* For the net we only need to relay rx queue,
> +				 * which will change the mem of VM.
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> +			}
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if (m_rx == true) {
> +				/* For the blk we need to relay all the read cmd
> +				 * of each queue
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;


>   			}
> -			internal->intr_fd[i] = fd;
> -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>   		}
>   	}
>   


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration
  2022-03-22 11:10       ` Maxime Coquelin
@ 2022-03-23  9:08         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-03-23  9:08 UTC (permalink / raw)
  To: Maxime Coquelin, dev; +Cc: Xia, Chenbo, Cao, Gang, Liu, Changpeng

Hi Maxime,

Thanks for your reply and reply is inline.

-----Original Message-----
From: Maxime Coquelin <maxime.coquelin@redhat.com> 
Sent: Tuesday, March 22, 2022 7:10 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: Re: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration

Hi Andy,

"vdpa/ifc: add block device SW live-migration"

On 1/29/22 04:03, Andy Pei wrote:
> Enable virtio blk sw live migration relay callfd and log the dirty page.

Please try to make the above sentence simpler. Also, it seems that below patch changes behaviour for net devices, so the commit message should explain that.

Andy: Sure, I think it is better to send out a new patch set.
Using a simper commit log and re-work to make sure the code do not change the behavior of net device.

> In this version we ignore the write cmd and still mark it dirty.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.c |   4 +-
>   drivers/vdpa/ifc/base/ifcvf.h |   6 ++
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
>   3 files changed, 116 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c 
> b/drivers/vdpa/ifc/base/ifcvf.c index 721cb1d..3a69e53 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -189,7 +189,7 @@
>   	IFCVF_WRITE_REG32(val >> 32, hi);
>   }
>   
> -STATIC int
> +int
>   ifcvf_hw_enable(struct ifcvf_hw *hw)
>   {
>   	struct ifcvf_pci_common_cfg *cfg;
> @@ -238,7 +238,7 @@
>   	return 0;
>   }
>   
> -STATIC void
> +void
>   ifcvf_hw_disable(struct ifcvf_hw *hw)
>   {
>   	u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h 
> b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
>   u64
>   ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
>   
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
>   #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c 
> b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4f99bb3..a930825 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
>   
>   	rte_vhost_get_negotiated_features(vid, &features);
>   	if (RTE_VHOST_NEED_LOG(features)) {
> -		ifcvf_disable_logging(hw);
> -		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> -		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> -				log_base, IFCVF_LOG_BASE, log_size);
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
> +		/* IFCVF marks dirty memory pages for only packet buffer,
> +		 * SW helps to mark the used ring as dirty after device stops.
> +		 */
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> +			rte_vhost_log_used_vring(vid, i, 0, len);
> +		}
> +	}
> +}
> +
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) {
> +	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
> +	int i, vid;
> +	uint64_t features = 0;
> +	uint64_t log_base = 0, log_size = 0;
> +	uint64_t len;
> +
> +	vid = internal->vid;
> +
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +
> +	ifcvf_hw_disable(hw);
> +
> +	for (i = 0; i < hw->nr_vring; i++)
> +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> +				hw->vring[i].last_used_idx);
> +
> +	if (internal->sw_lm)
> +		return;
> +
> +	rte_vhost_get_negotiated_features(vid, &features);
> +	if (RTE_VHOST_NEED_LOG(features)) {
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
>   		/*
>   		 * IFCVF marks dirty memory pages for only packet buffer,
>   		 * SW helps to mark the used ring as dirty after device stops.
> @@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
>   		}
>   		hw->vring[i].avail = gpa;
>   
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> +		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
> +		 * BLK: relay every queue
> +		 */
> +		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
>   			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
>   			if (gpa == 0) {
>   				DRV_LOG(ERR, "Fail to get GPA for used ring.");
>   				return -1;
>   			}
>   			hw->vring[i].used = gpa;
> -		} else {
> +		} else if (internal->device_type == IFCVF_BLK) {
>   			hw->vring[i].used = m_vring_iova +
>   				(char *)internal->m_vring[i].used -
>   				(char *)internal->m_vring[i].desc; @@ -688,7 +747,10 @@ struct 
> rte_vdpa_dev_info {
>   	}
>   	hw->nr_vring = nr_vring;
>   
> -	return ifcvf_start_hw(&internal->hw);
> +	if (internal->device_type == IFCVF_NET)
> +		return ifcvf_start_hw(&internal->hw);
> +	else if (internal->device_type == IFCVF_BLK)
> +		return ifcvf_hw_enable(&internal->hw);
>   
>   error:
>   	for (i = 0; i < nr_vring; i++)
> @@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
>   
>   	for (i = 0; i < hw->nr_vring; i++) {
>   		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
> +		     internal->device_type == IFCVF_BLK) {
>   			update_used_ring(internal, i);
> +		}
>   
>   		rte_vhost_get_vhost_vring(vid, i, &vq);
>   		len = IFCVF_USED_RING_LEN(vq.size); @@ -726,6 +790,8 @@ struct 
> rte_vdpa_dev_info {
>   			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
>   			m_vring_iova, size);
>   
> +		hw->vring[i].last_avail_idx = vq.used->idx;
> +		hw->vring[i].last_used_idx = vq.used->idx;
>   		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
>   				hw->vring[i].last_used_idx);
>   		rte_free(internal->m_vring[i].desc);
> @@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
>   		}
>   	}
>   
> -	for (qid = 0; qid < q_num; qid += 2) {
> -		ev.events = EPOLLIN | EPOLLPRI;
> -		/* leave a flag to mark it's for interrupt */
> -		ev.data.u64 = 1 | qid << 1 |
> -			(uint64_t)internal->intr_fd[qid] << 32;
> -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> -				< 0) {
> -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> -			return NULL;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (qid = 0; qid < q_num; qid += 2) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (qid = 0; qid < q_num; qid += 1) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
>   		}
> -		update_used_ring(internal, qid);
>   	}
>   
>   	/* start relay with a first kick */ @@ -874,7 +959,10 @@ struct 
> rte_vdpa_dev_info {
>   
>   	/* stop the direct IO data path */
>   	unset_notify_relay(internal);
> -	vdpa_ifcvf_stop(internal);
> +	if (internal->device_type == IFCVF_NET)
> +		vdpa_ifcvf_stop(internal);
> +	else if (internal->device_type == IFCVF_BLK)
> +		vdpa_ifcvf_blk_pause(internal);
>   	vdpa_disable_vfio_intr(internal);
>   
>   	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, 
> false);


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v3 06/15] example/vdpa:add vdpa blk support in example
  2022-03-22 11:29       ` Maxime Coquelin
@ 2022-03-23  9:31         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-03-23  9:31 UTC (permalink / raw)
  To: Maxime Coquelin, dev; +Cc: Xia, Chenbo, Cao, Gang, Liu, Changpeng

Hi Maxime,

I seems a lot of problem with the example.
I think I will re-work the example according to your comments.
Thanks for your comments.

-----Original Message-----
From: Maxime Coquelin <maxime.coquelin@redhat.com> 
Sent: Tuesday, March 22, 2022 7:30 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: Re: [PATCH v3 06/15] example/vdpa:add vdpa blk support in example



On 1/29/22 04:03, Andy Pei wrote:
> Add virtio blk device support to vdpa example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   examples/vdpa/Makefile           |   2 +-
>   examples/vdpa/main.c             |   8 ++
>   examples/vdpa/meson.build        |   1 +
>   examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++++++++++++++++++++++
>   examples/vdpa/vdpa_blk_compact.h | 117 ++++++++++++++++++++++++
>   examples/vdpa/vhost_user.h       | 189 +++++++++++++++++++++++++++++++++++++++
>   6 files changed, 466 insertions(+), 1 deletion(-)
>   create mode 100644 examples/vdpa/vdpa_blk_compact.c
>   create mode 100644 examples/vdpa/vdpa_blk_compact.h
>   create mode 100644 examples/vdpa/vhost_user.h
> 
> diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile index 
> d974db4..9d0479b 100644
> --- a/examples/vdpa/Makefile
> +++ b/examples/vdpa/Makefile
> @@ -5,7 +5,7 @@
>   APP = vdpa
>   
>   # all source are stored in SRCS-y
> -SRCS-y := main.c
> +SRCS-y := main.c vdpa_blk_compact.c
>   CFLAGS += -DALLOW_EXPERIMENTAL_API
>   
>   PKGCONF ?= pkg-config
> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index 
> 5ab0765..924ad7b 100644
> --- a/examples/vdpa/main.c
> +++ b/examples/vdpa/main.c
> @@ -20,6 +20,7 @@
>   #include <cmdline_parse_string.h>
>   #include <cmdline_parse_num.h>
>   #include <cmdline.h>
> +#include "vdpa_blk_compact.h"
>   
>   #define MAX_PATH_LEN 128
>   #define MAX_VDPA_SAMPLE_PORTS 1024
> @@ -156,6 +157,7 @@ struct vdpa_port {
>   static const struct rte_vhost_device_ops vdpa_sample_devops = {
>   	.new_device = new_device,
>   	.destroy_device = destroy_device,
> +	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
>   };
>   
>   static int
> @@ -192,6 +194,12 @@ struct vdpa_port {
>   			"attach vdpa device failed: %s\n",
>   			socket_path);
>   
> +	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
> +		< 0)
> +		rte_exit(EXIT_FAILURE,
> +			"set vhost blk driver features and protocol features failed: %s\n",
> +			socket_path);
> +

That does not look right, blk devices specitic functions shuold be called only for block devices.

>   	if (rte_vhost_driver_start(socket_path) < 0)
>   		rte_exit(EXIT_FAILURE,
>   			"start vhost driver failed: %s\n", diff --git 
> a/examples/vdpa/meson.build b/examples/vdpa/meson.build index 
> bd08605..f0d111c 100644
> --- a/examples/vdpa/meson.build
> +++ b/examples/vdpa/meson.build
> @@ -15,4 +15,5 @@ deps += 'vhost'
>   allow_experimental_apis = true
>   sources = files(
>           'main.c',
> +	'vdpa_blk_compact.c',
>   )
> diff --git a/examples/vdpa/vdpa_blk_compact.c 
> b/examples/vdpa/vdpa_blk_compact.c
> new file mode 100644
> index 0000000..0c4d3ee
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.c
> @@ -0,0 +1,150 @@
> +/*    INTEL CONFIDENTIAL
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +/* @file
> + *
> + * Block device specific vhost lib
> + */
> +
> +#include <stdbool.h>
> +
> +#include <rte_malloc.h>
> +#include <vdpa_driver.h>

That's wrong, the application is not supposed to include the driver APIs.

> +#include <rte_vhost.h>
> +#include "vdpa_blk_compact.h"
> +#include "vhost_user.h"
> +
> +#define VHOST_USER_GET_CONFIG	24
> +#define VHOST_USER_SET_CONFIG	25
> +
> +#ifndef VHOST_USER_PROTOCOL_F_CONFIG
> +#define VHOST_USER_PROTOCOL_F_CONFIG   9
> +#endif
> +
> +/*
> + * Function to handle vhost user blk message  */ static enum 
> +rte_vhost_msg_result rte_vhost_blk_extern_vhost_pre_msg_handler(int 
> +vid, void *_msg) {
> +	struct VhostUserMsg *msg = _msg;
> +	struct rte_vdpa_device *vdev = NULL;
> +
> +	vdev = rte_vhost_get_vdpa_device(vid);
> +	if (vdev == NULL)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	fprintf(stderr, "msg is %d\n", msg->request.master);
> +	switch (msg->request.master) {
> +	case VHOST_USER_GET_CONFIG: {
> +		int rc = 0;
> +
> +		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
> +
> +		if (vdev->ops->get_config) {
> +			fprintf(stdout, "get_config() function is valid!\n");
> +			rc = vdev->ops->get_config(vid,
> +						   msg->payload.cfg.region,
> +						   msg->payload.cfg.size);
> +			if (rc != 0) {
> +				msg->size = 0;
> +				fprintf(stdout, "get_config() return error!\n");
> +			}
> +		} else {
> +			fprintf(stdout, "get_config() function is invalid!\n");
> +		}
> +
> +		return RTE_VHOST_MSG_RESULT_REPLY;
> +	}
> +	case VHOST_USER_SET_CONFIG: {
> +		int rc = 0;
> +
> +		fprintf(stdout,
> +			"read message VHOST_USER_SET_CONFIG\n");
> +
> +		if (vdev->ops->set_config) {
> +			rc = vdev->ops->set_config(vid,
> +				msg->payload.cfg.region,
> +				msg->payload.cfg.offset,
> +				msg->payload.cfg.size,
> +				msg->payload.cfg.flags);
> +		}
> +
> +		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
> +	}
> +	default:
> +		break;
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_NOT_HANDLED; }

I think above message handling should be done in the Vhost library directly. VHOST_USER_SET_CONFIG and VHOST_USER_GET_CONFIG are not specific to blk backends, these are generic messages.

> +
> +struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
> +	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
> +	.post_msg_handle = NULL,
> +};
> +
> +int
> +rte_vhost_blk_session_install_rte_compat_hooks(int vid) {
> +	int rc;
> +
> +	rc = rte_vhost_extern_callback_register(vid,
> +						&g_blk_extern_vhost_ops,
> +						NULL);
> +	if (rc != 0) {
> +		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
> +		return -1;
> +	}
> +	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
> +	return 0;
> +}
> +
> +
> +int
> +vdpa_blk_device_set_features_and_protocol(const char *path,
> +	struct rte_vdpa_device *vdev)
> +{
> +	uint64_t protocol_features = 0;
> +
> +	if (!vdev) {
> +		fprintf(stdout, "vdev is NULL.\n");
> +		return -EINVAL;
> +	}
> +
> +	/* vdpa net does not have the get_config */
> +	if (!vdev->ops->get_config)
> +		return 0;

That's not good, as I said earlier, the drivers callback should not be visible to the application. Maybe the VDPA API should be extended to return the device type, I'm not sure, but accessing the drivers ops is prohibited.

> +	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
> +	rte_vhost_driver_disable_features(path,
> +		SPDK_VHOST_BLK_DISABLED_FEATURES);
> +
> +	rte_vhost_driver_get_protocol_features(path, &protocol_features);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
> +	rte_vhost_driver_set_protocol_features(path, protocol_features);
> +
> +	return 0;
> +}
> diff --git a/examples/vdpa/vdpa_blk_compact.h 
> b/examples/vdpa/vdpa_blk_compact.h
> new file mode 100644
> index 0000000..420d48e
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.h
> @@ -0,0 +1,117 @@
> +/*    INTEL CONFIDENTIAL

I hope it was not supposed to be confidential :)

> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +#ifndef _VDPA_BLK_COMPACT_H_
> +#define _VDPA_BLK_COMPACT_H_
> +
> +/**
> + * @file
> + *
> + * Device specific vhost lib
> + */
> +/vdpa
> +#include <stdbool.h>
> +
> +#include <rte_pci.h>
> +#include <rte_vhost.h>
> +
> +/* Feature bits */
> +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
> +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
> +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> +#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
> +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
> +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
> +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
> +
> +/* Legacy feature bits */
> +#ifndef VIRTIO_BLK_NO_LEGACY
> +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
> +#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
> +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
> +
> +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */ #define 
> +VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH #endif /* !VIRTIO_BLK_NO_LEGACY 
> +*/
> +
> +#ifndef VHOST_USER_F_PROTOCOL_FEATURES #define 
> +VHOST_USER_F_PROTOCOL_FEATURES 30 #endif
> +
> +#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \

Why these references to SPDK?

> +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> +	(1ULL << VIRTIO_F_VERSION_1) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
> +
> +#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
> +
> +#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
> +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
> +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
> +	(1ULL << VIRTIO_BLK_F_MQ))
> +
> +/* Not supported features */
> +#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
> +	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
> +
> +/* Vhost-blk support protocol features */ #define 
> +SPDK_VHOST_BLK_PROTOCOL_FEATURES \
> +	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
> +	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * This function will set vhost user block
> + *
> + * @param path
> + *  socket path
> + */
> +int
> +vdpa_blk_device_set_features_and_protocol(const char *path,
> +	struct rte_vdpa_device *vdev);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Install external hook to handle vhost user block message
> + *
> + * @param vid
> + *  vhost device id
> + */
> +int
> +rte_vhost_blk_session_install_rte_compat_hooks(int vid);
> +
> +#endif /* _VDPA_BLK_COMPACT_H_ */
> diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h 
> new file mode 100644 index 0000000..8b747d0
> --- /dev/null
> +++ b/examples/vdpa/vhost_user.h
> @@ -0,0 +1,189 @@
> +/*    INTEL CONFIDENTIAL
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "rte_vhost.h"
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +
> +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +#endif
> +
> +#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
> +			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
> +
> +typedef enum VhostUserRequest {
> +	VHOST_USER_NONE = 0,
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SET_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> +	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> +	VHOST_USER_POSTCOPY_ADVISE = 28,
> +	VHOST_USER_POSTCOPY_LISTEN = 29,
> +	VHOST_USER_POSTCOPY_END = 30,
> +	VHOST_USER_GET_INFLIGHT_FD = 31,
> +	VHOST_USER_SET_INFLIGHT_FD = 32,
> +	VHOST_USER_MAX = 33
> +} VhostUserRequest;
> +
> +typedef enum VhostUserSlaveRequest {
> +	VHOST_USER_SLAVE_NONE = 0,
> +	VHOST_USER_SLAVE_IOTLB_MSG = 1,
> +	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
> +	VHOST_USER_SLAVE_MAX
> +} VhostUserSlaveRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +	uint64_t guest_phys_addr;
> +	uint64_t memory_size;
> +	uint64_t userspace_addr;
> +	uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +	uint32_t nregions;
> +	uint32_t padding;
> +	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserLog {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +} VhostUserLog;
> +
> +/* Comply with Cryptodev-Linux */
> +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
> +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
> +
> +/* Same structure as vhost-user backend session info */ typedef 
> +struct VhostUserCryptoSessionParam {
> +	int64_t session_id;
> +	uint32_t op_code;
> +	uint32_t cipher_algo;
> +	uint32_t cipher_key_len;
> +	uint32_t hash_algo;
> +	uint32_t digest_len;
> +	uint32_t auth_key_len;
> +	uint32_t aad_len;
> +	uint8_t op_type;
> +	uint8_t dir;
> +	uint8_t hash_mode;
> +	uint8_t chaining_dir;
> +	uint8_t *ciphe_key;
> +	uint8_t *auth_key;
> +	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
> +	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
> +} VhostUserCryptoSessionParam;
> +
> +typedef struct VhostUserVringArea {
> +	uint64_t u64;
> +	uint64_t size;
> +	uint64_t offset;
> +} VhostUserVringArea;
> +
> +typedef struct VhostUserInflight {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +	uint16_t num_queues;
> +	uint16_t queue_size;
> +} VhostUserInflight;
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
> +typedef struct VhostUserMsg {
> +	union {
> +		uint32_t master; /* a VhostUserRequest value */
> +		uint32_t slave;  /* a VhostUserSlaveRequest value*/
> +	} request;
> +
> +#define VHOST_USER_VERSION_MASK     0x3
> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> +#define VHOST_USER_NEED_REPLY		(0x1 << 3)
> +	uint32_t flags;
> +	uint32_t size; /* the following payload size */
> +	union {
> +#define VHOST_USER_VRING_IDX_MASK   0xff
> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> +		uint64_t u64;
> +		struct vhost_vring_state state;
> +		struct vhost_vring_addr addr;
> +		VhostUserMemory memory;
> +		VhostUserLog    log;
> +		struct vhost_iotlb_msg iotlb;
> +		VhostUserCryptoSessionParam crypto_session;
> +		VhostUserVringArea area;
> +		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
> +	} payload;
> +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> +	int fd_num;
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    0x1
> +#endif


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-03-27 14:51   ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (15 more replies)
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (3 subsequent siblings)
  6 siblings, 16 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

v4:
 add args "isblk" to vdpa example to specify a block device, fix some
 issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
  Fix some coding style issues.

Andy Pei (16):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add block device SW live-migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA lauch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
    same when blk device pause
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 523 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/main.c             |  61 ++++-
 examples/vdpa/vdpa_blk_compact.h |  72 ++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  65 +++++
 lib/vhost/vhost_user.h           |  15 ++
 usertools/dpdk-devbind.py        |   8 +
 10 files changed, 937 insertions(+), 55 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 01/16] vdpa/ifc: add support for virtio blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 02/16] vhost: add vdpa ops for " Andy Pei
                       ` (14 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..e3210a8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1174,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1227,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1277,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1306,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1313,6 +1375,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 02/16] vhost: add vdpa ops for blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-03-27 14:51     ` [PATCH v4 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
                       ` (13 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-03-27 14:51     ` [PATCH v4 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-03-27 14:51     ` [PATCH v4 02/16] vhost: add vdpa ops for " Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 15 +++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 1d39067..b11fafd 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -80,6 +80,8 @@
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
@@ -2542,6 +2544,52 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR, "get_config() return error!\n");
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "get_config() not supportted!\n");
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "set_config() not supportted!\n");
+	}
+
+	return ret == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2782,6 +2830,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
 	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
 	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
+	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
 	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
 	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
 	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index c946cc2..d3f014e 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -125,6 +127,18 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -148,6 +162,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 04/16] vdpa/ifc: add blk ops for ifc device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e3210a8..8ee041f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 05/16] vdpa/ifc: add vdpa interrupt for blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (10 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For the block device type, we have to relay
the commands on all queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8ee041f..8d104b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 06/16] vdpa/ifc: add block device SW live-migration
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 07/16] example/vdpa:add vdpa blk support in example Andy Pei
                       ` (9 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add SW live-migration support to block device.
Add dirty page logging to block device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 115 insertions(+), 23 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..e417c50 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -191,7 +191,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -240,7 +240,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8d104b7..a23dc2d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
 	}
 }
 
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		ifcvf_disable_logging(hw);
+		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
+		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
+				log_base, IFCVF_LOG_BASE, log_size);
+		/*
+		 * IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
 		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
 static int
@@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
-			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
-			if (gpa == 0) {
-				DRV_LOG(ERR, "Fail to get GPA for used ring.");
-				return -1;
+		if (internal->device_type == IFCVF_NET) {
+			/* Direct I/O for Tx queue, relay for Rx queue */
+			if (i & 1) {
+				gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
+				if (gpa == 0) {
+					DRV_LOG(ERR, "Fail to get GPA for used ring.");
+					return -1;
+				}
+				hw->vring[i].used = gpa;
+			} else {
+				hw->vring[i].used = m_vring_iova +
+					(char *)internal->m_vring[i].used -
+					(char *)internal->m_vring[i].desc;
 			}
-			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
+			/* BLK: relay every queue */
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 07/16] example/vdpa:add vdpa blk support in example
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 08/16] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to vdpa example.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             |  61 +++++++++++++-
 examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..1c809ab 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -41,6 +42,7 @@ struct vdpa_port {
 static int devcnt;
 static int interactive;
 static int client_mode;
+static int isblk;
 
 /* display usage */
 static void
@@ -49,7 +51,8 @@ struct vdpa_port {
 	printf("Usage: %s [EAL options] -- "
 				 "	--interactive|-i: run in interactive mode.\n"
 				 "	--iface <path>: specify the path prefix of the socket files, e.g. /tmp/vhost-user-.\n"
-				 "	--client: register a vhost-user socket as client mode.\n",
+				 "	--client: register a vhost-user socket as client mode.\n"
+				 "	--isblk: device is a block device, e.g. virtio_blk device.\n",
 				 prgname);
 }
 
@@ -61,6 +64,7 @@ struct vdpa_port {
 		{"iface", required_argument, NULL, 0},
 		{"interactive", no_argument, &interactive, 1},
 		{"client", no_argument, &client_mode, 1},
+		{"isblk", no_argument, &isblk, 1},
 		{NULL, 0, 0, 0},
 	};
 	int opt, idx;
@@ -159,6 +163,52 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES_BASE);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_VDPA_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
 	int ret;
@@ -192,6 +242,15 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (isblk) {
+		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..e7c0f22
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define VHOST_BLK_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_VDPA_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..137bbc2
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 08/16] usertools: add support for virtio blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 07/16] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 20:01       ` Stephen Hemminger
  2022-03-27 14:51     ` [PATCH v4 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                       ` (7 subsequent siblings)
  15 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 10/16] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index a23dc2d..28191e4 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1368,6 +1368,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1385,7 +1395,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 10/16] vdpa/ifc: add some log at VDPA lauch before qemu connect
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 28191e4..9bc2f47 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1431,6 +1431,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	__u64 capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1497,6 +1500,32 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/**
+		** cannot read 64-bit register in one attempt,
+		** so read byte by byte.
+		**/
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (__u64)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 11/16] vdpa/ifc: read virtio max_queues from hardware
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 10/16] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                       ` (4 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9bc2f47..20a0b01 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1526,6 +1526,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 12/16] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                       ` (3 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 113 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 20a0b01..826b408 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -608,6 +610,108 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+
+	return;
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -634,10 +738,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -958,6 +1068,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                       ` (2 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 826b408..95538c1 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1605,11 +1605,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
  2022-03-27 14:51     ` [PATCH v4 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index e417c50..d923266 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +262,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d923266..d89cb73 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -118,7 +118,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 95538c1..36fd850 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -351,23 +351,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -752,7 +761,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v4 16/16] vhost: make sure each queue callfd is configured
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (14 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index b11fafd..8c5904f 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3197,12 +3197,27 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n",
 					dev->ifname);
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/**
+		** VIRTIO_DEV_VDPA_CONFIGURED already configured
+		** close the device and open the device again,
+		** make sure the call fd of each queue is configed to haedware.
+		**/
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* Re: [PATCH v4 08/16] usertools: add support for virtio blk device
  2022-03-27 14:51     ` [PATCH v4 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-03-27 20:01       ` Stephen Hemminger
  0 siblings, 0 replies; 191+ messages in thread
From: Stephen Hemminger @ 2022-03-27 20:01 UTC (permalink / raw)
  To: Andy Pei; +Cc: dev, chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

On Sun, 27 Mar 2022 22:51:31 +0800
Andy Pei <andy.pei@intel.com> wrote:

> Signed-off-by: Andy Pei <andy.pei@intel.com>

Shouldn't we just recommend driverctl instead?

I had patches for devbind to use vmbus rejected because of that.



^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                     ` (2 preceding siblings ...)
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-03-28  7:17   ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (15 more replies)
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (2 subsequent siblings)
  6 siblings, 16 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

v5:
 fix some coding style issues.
v4:
 add args "isblk" to vdpa example to specify a block device, fix some
 issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.

Andy Pei (16):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add block device SW live-migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA launch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
    same when blk device pause
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 521 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/main.c             |  61 ++++-
 examples/vdpa/vdpa_blk_compact.h |  72 ++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  64 +++++
 lib/vhost/vhost_user.h           |  15 ++
 usertools/dpdk-devbind.py        |   8 +
 10 files changed, 934 insertions(+), 55 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 01/16] vdpa/ifc: add support for virtio blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 02/16] vhost: add vdpa ops for " Andy Pei
                       ` (14 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..e3210a8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1174,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1227,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1277,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1306,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1313,6 +1375,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 02/16] vhost: add vdpa ops for blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-03-28  7:17     ` [PATCH v5 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
                       ` (13 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-03-28  7:17     ` [PATCH v5 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-03-28  7:17     ` [PATCH v5 02/16] vhost: add vdpa ops for " Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-04-20 13:53       ` Xia, Chenbo
  2022-03-28  7:17     ` [PATCH v5 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  15 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 15 +++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 1d39067..55e8bd0 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -80,6 +80,8 @@
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
@@ -2542,6 +2544,52 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR, "get_config() return error!\n");
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "get_config() not supported!\n");
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "set_config() not supported!\n");
+	}
+
+	return ret == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2782,6 +2830,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
 	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
 	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
+	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
 	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
 	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
 	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index c946cc2..d3f014e 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -125,6 +127,18 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -148,6 +162,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 04/16] vdpa/ifc: add blk ops for ifc device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e3210a8..8ee041f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 05/16] vdpa/ifc: add vdpa interrupt for blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (10 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For the block device type, we have to relay
the commands on all queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8ee041f..8d104b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 06/16] vdpa/ifc: add block device SW live-migration
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 07/16] example/vdpa:add vdpa blk support in example Andy Pei
                       ` (9 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add SW live-migration support to block device.
Add dirty page logging to block device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 115 insertions(+), 23 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..e417c50 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -191,7 +191,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -240,7 +240,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8d104b7..a23dc2d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
 	}
 }
 
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		ifcvf_disable_logging(hw);
+		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
+		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
+				log_base, IFCVF_LOG_BASE, log_size);
+		/*
+		 * IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
 		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
 static int
@@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
-			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
-			if (gpa == 0) {
-				DRV_LOG(ERR, "Fail to get GPA for used ring.");
-				return -1;
+		if (internal->device_type == IFCVF_NET) {
+			/* Direct I/O for Tx queue, relay for Rx queue */
+			if (i & 1) {
+				gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
+				if (gpa == 0) {
+					DRV_LOG(ERR, "Fail to get GPA for used ring.");
+					return -1;
+				}
+				hw->vring[i].used = gpa;
+			} else {
+				hw->vring[i].used = m_vring_iova +
+					(char *)internal->m_vring[i].used -
+					(char *)internal->m_vring[i].desc;
 			}
-			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
+			/* BLK: relay every queue */
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 07/16] example/vdpa:add vdpa blk support in example
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 08/16] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to vdpa example.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             |  61 +++++++++++++-
 examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..1c809ab 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -41,6 +42,7 @@ struct vdpa_port {
 static int devcnt;
 static int interactive;
 static int client_mode;
+static int isblk;
 
 /* display usage */
 static void
@@ -49,7 +51,8 @@ struct vdpa_port {
 	printf("Usage: %s [EAL options] -- "
 				 "	--interactive|-i: run in interactive mode.\n"
 				 "	--iface <path>: specify the path prefix of the socket files, e.g. /tmp/vhost-user-.\n"
-				 "	--client: register a vhost-user socket as client mode.\n",
+				 "	--client: register a vhost-user socket as client mode.\n"
+				 "	--isblk: device is a block device, e.g. virtio_blk device.\n",
 				 prgname);
 }
 
@@ -61,6 +64,7 @@ struct vdpa_port {
 		{"iface", required_argument, NULL, 0},
 		{"interactive", no_argument, &interactive, 1},
 		{"client", no_argument, &client_mode, 1},
+		{"isblk", no_argument, &isblk, 1},
 		{NULL, 0, 0, 0},
 	};
 	int opt, idx;
@@ -159,6 +163,52 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES_BASE);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_VDPA_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
 	int ret;
@@ -192,6 +242,15 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (isblk) {
+		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..e7c0f22
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define VHOST_BLK_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_VDPA_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..137bbc2
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 08/16] usertools: add support for virtio blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 07/16] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                       ` (7 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to devbind.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 10/16] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index a23dc2d..28191e4 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1368,6 +1368,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1385,7 +1395,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 10/16] vdpa/ifc: add some log at VDPA launch before qemu connect
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add some log of virtio blk device config space information
at VDPA launch before qemu connects.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 28191e4..045623b 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1431,6 +1431,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1497,6 +1500,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 11/16] vdpa/ifc: read virtio max_queues from hardware
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 10/16] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                       ` (4 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 045623b..e8e7d61 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1525,6 +1525,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 12/16] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                       ` (3 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 112 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e8e7d61..c02ae4d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -608,6 +610,107 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -634,10 +737,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -958,6 +1067,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                       ` (2 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add is_blk flag to ifcvf_hw, and init is_blk during probe.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index c02ae4d..f54beaf 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1604,11 +1604,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
  2022-03-28  7:17     ` [PATCH v5 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index e417c50..d923266 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +262,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

When virtio blk device is pause, make sure hardware last_avail_idx
and last_used_idx are the same.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d923266..d89cb73 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -118,7 +118,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index f54beaf..578bf6c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -351,23 +351,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -751,7 +760,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v5 16/16] vhost: make sure each queue callfd is configured
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (14 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 55e8bd0..43154c0 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3197,12 +3197,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n",
 					dev->ifname);
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  2022-03-28  7:17     ` [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
@ 2022-04-20 13:53       ` Xia, Chenbo
  2022-04-21  8:05         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-04-20 13:53 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Andy,

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, March 28, 2022 3:17 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG

Let's make the title a bit short...

./devtools/check-git-log.sh will help you find other similar errors for other
patches.

> 
> Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> supported by virtio blk VDPA device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 50
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vhost_user.h | 15 +++++++++++++++
>  2 files changed, 65 insertions(+)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 1d39067..55e8bd0 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -80,6 +80,8 @@
>  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
>  	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
>  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
>  	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>  	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
>  	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
> @@ -2542,6 +2544,52 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
> 
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (vdpa_dev->ops->get_config) {
> +		ret = vdpa_dev->ops->get_config(dev->vid,
> +					   ctx->msg.payload.cfg.region,
> +					   ctx->msg.payload.cfg.size);
> +		if (ret != 0) {
> +			ctx->msg.size = 0;
> +			VHOST_LOG_CONFIG(ERR, "get_config() return error!\n");
> +		}
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "get_config() not supported!\n");
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (vdpa_dev->ops->set_config) {
> +		ret = vdpa_dev->ops->set_config(dev->vid,
> +			ctx->msg.payload.cfg.region,
> +			ctx->msg.payload.cfg.offset,
> +			ctx->msg.payload.cfg.size,
> +			ctx->msg.payload.cfg.flags);
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "set_config() not supported!\n");
> +	}
> +
> +	return ret == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;

I think when set_config fails in vdpa driver, it should not break message handler by
returning RESULT_ERR here.

All error log above, please print dev->ifname too, which will be user-friendly.

> +}
> +
> +static int
>  vhost_user_iotlb_msg(struct virtio_net **pdev,
>  			struct vhu_msg_context *ctx,
>  			int main_fd __rte_unused)
> @@ -2782,6 +2830,8 @@ typedef int (*vhost_message_handler_t)(struct
> virtio_net **pdev,
>  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
>  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
>  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
>  	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
>  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
>  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
> diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
> index c946cc2..d3f014e 100644
> --- a/lib/vhost/vhost_user.h
> +++ b/lib/vhost/vhost_user.h
> @@ -50,6 +50,8 @@
>  	VHOST_USER_NET_SET_MTU = 20,
>  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
>  	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
>  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
>  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
>  	VHOST_USER_POSTCOPY_ADVISE = 28,
> @@ -125,6 +127,18 @@
>  	uint16_t queue_size;
>  } VhostUserInflight;
> 
> +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +#endif

For this config size, maybe '+#define VHOST_USER_MAX_CONFIG_SIZE 256' is enough?

> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};

Since the max size is defined, in the handler, we should check the size in the msg
handler.

Thanks,
Chenbo

> +
>  typedef struct VhostUserMsg {
>  	union {
>  		uint32_t master; /* a VhostUserRequest value */
> @@ -148,6 +162,7 @@
>  		VhostUserCryptoSessionParam crypto_session;
>  		VhostUserVringArea area;
>  		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
>  	} payload;
>  	/* Nothing should be added after the payload */
>  } __rte_packed VhostUserMsg;
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  2022-04-20 13:53       ` Xia, Chenbo
@ 2022-04-21  8:05         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-04-21  8:05 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

HI  Chenbo,

Thanks for your reply.
My reply is inline.

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Wednesday, April 20, 2022 9:53 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v5 03/16] vhost: add support for
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
>
> Hi Andy,
>
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Monday, March 28, 2022 3:17 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v5 03/16] vhost: add support for
> VHOST_USER_GET_CONFIG
> > and VHOST_USER_SET_CONFIG
>
> Let's make the title a bit short...
>
> ./devtools/check-git-log.sh will help you find other similar errors for other
> patches.
>
OK, I will send out V6 to fix commit log title.
Thanks for your suggestion.
> >
> > Add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG.
> > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > supported by virtio blk VDPA device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  lib/vhost/vhost_user.c | 50
> > ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/vhost/vhost_user.h | 15 +++++++++++++++
> >  2 files changed, 65 insertions(+)
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 1d39067..55e8bd0 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -80,6 +80,8 @@
> >     [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> >     [VHOST_USER_SET_SLAVE_REQ_FD]  =
> "VHOST_USER_SET_SLAVE_REQ_FD",
> >     [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > +   [VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > +   [VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> >     [VHOST_USER_CRYPTO_CREATE_SESS] =
> "VHOST_USER_CRYPTO_CREATE_SESS",
> >     [VHOST_USER_CRYPTO_CLOSE_SESS] =
> "VHOST_USER_CRYPTO_CLOSE_SESS",
> >     [VHOST_USER_POSTCOPY_ADVISE]  =
> "VHOST_USER_POSTCOPY_ADVISE", @@
> > -2542,6 +2544,52 @@ static int is_vring_iotlb(struct virtio_net *dev,
> > }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +                   struct vhu_msg_context *ctx,
> > +                   int main_fd __rte_unused)
> > +{
> > +   struct virtio_net *dev = *pdev;
> > +   struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +   int ret = 0;
> > +
> > +   if (vdpa_dev->ops->get_config) {
> > +           ret = vdpa_dev->ops->get_config(dev->vid,
> > +                                      ctx->msg.payload.cfg.region,
> > +                                      ctx->msg.payload.cfg.size);
> > +           if (ret != 0) {
> > +                   ctx->msg.size = 0;
> > +                   VHOST_LOG_CONFIG(ERR, "get_config() return
> error!\n");
> > +           }
> > +   } else {
> > +           VHOST_LOG_CONFIG(ERR, "get_config() not supported!\n");
> > +   }
> > +
> > +   return RTE_VHOST_MSG_RESULT_REPLY;
> > +}
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +                   struct vhu_msg_context *ctx,
> > +                   int main_fd __rte_unused)
> > +{
> > +   struct virtio_net *dev = *pdev;
> > +   struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +   int ret = 0;
> > +
> > +   if (vdpa_dev->ops->set_config) {
> > +           ret = vdpa_dev->ops->set_config(dev->vid,
> > +                   ctx->msg.payload.cfg.region,
> > +                   ctx->msg.payload.cfg.offset,
> > +                   ctx->msg.payload.cfg.size,
> > +                   ctx->msg.payload.cfg.flags);
> > +   } else {
> > +           VHOST_LOG_CONFIG(ERR, "set_config() not supported!\n");
> > +   }
> > +
> > +   return ret == 0 ? RTE_VHOST_MSG_RESULT_OK :
> > +RTE_VHOST_MSG_RESULT_ERR;
>
> I think when set_config fails in vdpa driver, it should not break message
> handler by returning RESULT_ERR here.
>
I will return RTE_VHOST_MSG_RESULT_OK and output some log.

> All error log above, please print dev->ifname too, which will be user-friendly.
>
Sure.Thansk.
> > +}
> > +
> > +static int
> >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> >                     struct vhu_msg_context *ctx,
> >                     int main_fd __rte_unused)
> > @@ -2782,6 +2830,8 @@ typedef int (*vhost_message_handler_t)(struct
> > virtio_net **pdev,
> >     [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
> >     [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
> >     [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> > +   [VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > +   [VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> >     [VHOST_USER_POSTCOPY_ADVISE] =
> vhost_user_set_postcopy_advise,
> >     [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> >     [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> git
> > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > c946cc2..d3f014e 100644
> > --- a/lib/vhost/vhost_user.h
> > +++ b/lib/vhost/vhost_user.h
> > @@ -50,6 +50,8 @@
> >     VHOST_USER_NET_SET_MTU = 20,
> >     VHOST_USER_SET_SLAVE_REQ_FD = 21,
> >     VHOST_USER_IOTLB_MSG = 22,
> > +   VHOST_USER_GET_CONFIG = 24,
> > +   VHOST_USER_SET_CONFIG = 25,
> >     VHOST_USER_CRYPTO_CREATE_SESS = 26,
> >     VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> >     VHOST_USER_POSTCOPY_ADVISE = 28,
> > @@ -125,6 +127,18 @@
> >     uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> > +#define VHOST_USER_MAX_CONFIG_SIZE         256
> > +#endif
>
> For this config size, maybe '+#define VHOST_USER_MAX_CONFIG_SIZE 256' is
> enough?
>
Sure.
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +   uint32_t offset;
> > +   uint32_t size;
> > +   uint32_t flags;
> > +   uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
>
> Since the max size is defined, in the handler, we should check the size in the
> msg handler.
Sure.
>
> Thanks,
> Chenbo
>
> > +
> >  typedef struct VhostUserMsg {
> >     union {
> >             uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> +162,7 @@
> >             VhostUserCryptoSessionParam crypto_session;
> >             VhostUserVringArea area;
> >             VhostUserInflight inflight;
> > +           struct vhost_user_config cfg;
> >     } payload;
> >     /* Nothing should be added after the payload */  } __rte_packed
> > VhostUserMsg;
> > --
> > 1.8.3.1
>


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                     ` (3 preceding siblings ...)
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-04-21  8:33   ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (15 more replies)
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  6 siblings, 16 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

v6:
 fix some commit log.
 add vhost socket in log output to make it more user-friendly.
 when driver ops fail, just output some log, do not break message handler.
 check vhost msg size in msg handler.
v5:
 fix some coding style issues.
v4:
 add args "isblk" to vdpa example to specify a block device, fix some
 issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.

Andy Pei (16):
  vdpa/ifc: add support for virtio blk device
  vhost: add vDPA ops for blk device
  vhost: add vhost msg support
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vDPA interrupt for blk device
  vdpa/ifc: add block device SW live-migration
  examples/vdpa: add vDPA blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: add set vring state for blk device
  vdpa/ifc: add some log at vDPA launch before qemu connect
  vdpa/ifc: read virtio max queues from hardware
  vdpa/ifc: add interrupt and handle for virtio blk
  vdpa/ifc: add is blk flag to ifcvf HW struct
  vdpa/ifc/base: access correct register for blk device
  vdpa/ifc: blk device pause without no inflight IO
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 521 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/main.c             |  61 ++++-
 examples/vdpa/vdpa_blk_compact.h |  72 ++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  83 +++++++
 lib/vhost/vhost_user.h           |  13 +
 usertools/dpdk-devbind.py        |   8 +
 10 files changed, 951 insertions(+), 55 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 01/16] vdpa/ifc: add support for virtio blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 02/16] vhost: add vDPA ops for " Andy Pei
                       ` (14 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..e3210a8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1174,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1227,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1277,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1306,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1313,6 +1375,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 02/16] vhost: add vDPA ops for blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-04-21  8:33     ` [PATCH v6 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 03/16] vhost: add vhost msg support Andy Pei
                       ` (13 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vDPA ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-04-21  8:33     ` [PATCH v6 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-04-21  8:33     ` [PATCH v6 02/16] vhost: add vDPA ops for " Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 12:42       ` Xia, Chenbo
  2022-04-25 13:04       ` David Marchand
  2022-04-21  8:33     ` [PATCH v6 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  15 siblings, 2 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 13 ++++++++++
 2 files changed, 82 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 1d39067..3780804 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -80,6 +80,8 @@
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
@@ -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) get_config() return error!\n",
+					 dev->ifname);
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supportted!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) invalid set config msg size: %"PRId32" != %d\n",
+			dev->ifname, ctx->msg.size,
+			(int)sizeof(struct vhost_user_config));
+		goto OUT;
+	}
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+		if (ret)
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) set_config() return error!\n",
+					 dev->ifname);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supportted!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_OK;
+
+OUT:
+	return RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2782,6 +2849,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
 	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
 	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
+	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
 	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
 	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
 	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index c946cc2..97cfb2f 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -125,6 +127,16 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -148,6 +160,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 04/16] vdpa/ifc: add blk ops for ifc device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 03/16] vhost: add vhost msg support Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e3210a8..8ee041f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 12:58       ` Xia, Chenbo
  2022-04-21  8:33     ` [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (10 subsequent siblings)
  15 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For the block device type, we have to relay
the commands on all queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8ee041f..8d104b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 13:10       ` Xia, Chenbo
  2022-04-21  8:33     ` [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example Andy Pei
                       ` (9 subsequent siblings)
  15 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add SW live-migration support to block device.
Add dirty page logging to block device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 115 insertions(+), 23 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..e417c50 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -191,7 +191,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -240,7 +240,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8d104b7..a23dc2d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
 	}
 }
 
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		ifcvf_disable_logging(hw);
+		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
+		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
+				log_base, IFCVF_LOG_BASE, log_size);
+		/*
+		 * IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
 		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
 static int
@@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
-			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
-			if (gpa == 0) {
-				DRV_LOG(ERR, "Fail to get GPA for used ring.");
-				return -1;
+		if (internal->device_type == IFCVF_NET) {
+			/* Direct I/O for Tx queue, relay for Rx queue */
+			if (i & 1) {
+				gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
+				if (gpa == 0) {
+					DRV_LOG(ERR, "Fail to get GPA for used ring.");
+					return -1;
+				}
+				hw->vring[i].used = gpa;
+			} else {
+				hw->vring[i].used = m_vring_iova +
+					(char *)internal->m_vring[i].used -
+					(char *)internal->m_vring[i].desc;
 			}
-			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
+			/* BLK: relay every queue */
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 13:38       ` Xia, Chenbo
  2022-04-21  8:33     ` [PATCH v6 08/16] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  15 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to vDPA example.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             |  61 +++++++++++++-
 examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..1c809ab 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -41,6 +42,7 @@ struct vdpa_port {
 static int devcnt;
 static int interactive;
 static int client_mode;
+static int isblk;
 
 /* display usage */
 static void
@@ -49,7 +51,8 @@ struct vdpa_port {
 	printf("Usage: %s [EAL options] -- "
 				 "	--interactive|-i: run in interactive mode.\n"
 				 "	--iface <path>: specify the path prefix of the socket files, e.g. /tmp/vhost-user-.\n"
-				 "	--client: register a vhost-user socket as client mode.\n",
+				 "	--client: register a vhost-user socket as client mode.\n"
+				 "	--isblk: device is a block device, e.g. virtio_blk device.\n",
 				 prgname);
 }
 
@@ -61,6 +64,7 @@ struct vdpa_port {
 		{"iface", required_argument, NULL, 0},
 		{"interactive", no_argument, &interactive, 1},
 		{"client", no_argument, &client_mode, 1},
+		{"isblk", no_argument, &isblk, 1},
 		{NULL, 0, 0, 0},
 	};
 	int opt, idx;
@@ -159,6 +163,52 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES_BASE);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_VDPA_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
 	int ret;
@@ -192,6 +242,15 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (isblk) {
+		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..e7c0f22
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define VHOST_BLK_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_VDPA_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..137bbc2
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 08/16] usertools: add support for virtio blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 13:53       ` Xia, Chenbo
  2022-04-21  8:33     ` [PATCH v6 09/16] vdpa/ifc: add set vring state for " Andy Pei
                       ` (7 subsequent siblings)
  15 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to devbind.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 09/16] vdpa/ifc: add set vring state for blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 10/16] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index a23dc2d..28191e4 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1368,6 +1368,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1385,7 +1395,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 10/16] vdpa/ifc: add some log at vDPA launch before qemu connect
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 09/16] vdpa/ifc: add set vring state for " Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 11/16] vdpa/ifc: read virtio max queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add some log of virtio blk device config space information
at VDPA launch before qemu connects.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 28191e4..045623b 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1431,6 +1431,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1497,6 +1500,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 11/16] vdpa/ifc: read virtio max queues from hardware
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 10/16] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 12/16] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
                       ` (4 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 045623b..e8e7d61 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1525,6 +1525,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 12/16] vdpa/ifc: add interrupt and handle for virtio blk
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 11/16] vdpa/ifc: read virtio max queues from hardware Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 13/16] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
                       ` (3 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 112 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e8e7d61..c02ae4d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -608,6 +610,107 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -634,10 +737,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -958,6 +1067,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 13/16] vdpa/ifc: add is blk flag to ifcvf HW struct
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 12/16] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 14/16] vdpa/ifc/base: access correct register for blk device Andy Pei
                       ` (2 subsequent siblings)
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add is_blk flag to ifcvf_hw, and init is_blk during probe.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index c02ae4d..f54beaf 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1604,11 +1604,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 14/16] vdpa/ifc/base: access correct register for blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 13/16] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 15/16] vdpa/ifc: blk device pause without no inflight IO Andy Pei
  2022-04-21  8:33     ` [PATCH v6 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index e417c50..d923266 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +262,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 15/16] vdpa/ifc: blk device pause without no inflight IO
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 14/16] vdpa/ifc/base: access correct register for blk device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

When virtio blk device is pause, make sure hardware last_avail_idx
and last_used_idx are the same.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d923266..d89cb73 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -118,7 +118,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index f54beaf..578bf6c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -351,23 +351,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -751,7 +760,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v6 16/16] vhost: make sure each queue callfd is configured
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (14 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 15/16] vdpa/ifc: blk device pause without no inflight IO Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  15 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 3780804..e6f4113 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3216,12 +3216,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n",
 					dev->ifname);
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-21  8:33     ` [PATCH v6 03/16] vhost: add vhost msg support Andy Pei
@ 2022-04-25 12:42       ` Xia, Chenbo
  2022-04-26  8:55         ` Pei, Andy
  2022-04-25 13:04       ` David Marchand
  1 sibling, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-04-25 12:42 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Andy,

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 03/16] vhost: add vhost msg support
> 
> Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> supported by virtio blk VDPA device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 69
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vhost_user.h | 13 ++++++++++
>  2 files changed, 82 insertions(+)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 1d39067..3780804 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -80,6 +80,8 @@
>  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
>  	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
>  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
>  	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>  	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
>  	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
> @@ -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
> 
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (vdpa_dev->ops->get_config) {
> +		ret = vdpa_dev->ops->get_config(dev->vid,
> +					   ctx->msg.payload.cfg.region,
> +					   ctx->msg.payload.cfg.size);
> +		if (ret != 0) {
> +			ctx->msg.size = 0;
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) get_config() return error!\n",
> +					 dev->ifname);
> +		}
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supportted!\n",

Supported 

> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (ctx->msg.size != sizeof(struct vhost_user_config)) {

I think you should do sanity check on payload.cfg.size and make sure it's smaller
than VHOST_USER_MAX_CONFIG_SIZE

and same check for offset

> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) invalid set config msg size: %"PRId32" != %d\n",
> +			dev->ifname, ctx->msg.size,

Based on you will change the log too, payload.cfg.size is uint32_t,
so PRId32 -> PRIu32

> +			(int)sizeof(struct vhost_user_config));

So this can be %u

> +		goto OUT;
> +	}
> +
> +	if (vdpa_dev->ops->set_config) {
> +		ret = vdpa_dev->ops->set_config(dev->vid,
> +			ctx->msg.payload.cfg.region,
> +			ctx->msg.payload.cfg.offset,
> +			ctx->msg.payload.cfg.size,
> +			ctx->msg.payload.cfg.flags);
> +		if (ret)
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) set_config() return error!\n",
> +					 dev->ifname);
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supportted!\n",

Supported

> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_OK;
> +
> +OUT:

Lower case looks better

> +	return RTE_VHOST_MSG_RESULT_ERR;
> +}

Almost all handlers need check on expected fd num (this case is 0), so the above new
handlers should also do that. Please refer to validate_msg_fds in other handlers.

BTW, you can wait for review for other patches and send new versions later.

Thanks,
Chenbo

> +
> +static int
>  vhost_user_iotlb_msg(struct virtio_net **pdev,
>  			struct vhu_msg_context *ctx,
>  			int main_fd __rte_unused)
> @@ -2782,6 +2849,8 @@ typedef int (*vhost_message_handler_t)(struct
> virtio_net **pdev,
>  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
>  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
>  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
>  	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
>  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
>  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
> diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
> index c946cc2..97cfb2f 100644
> --- a/lib/vhost/vhost_user.h
> +++ b/lib/vhost/vhost_user.h
> @@ -50,6 +50,8 @@
>  	VHOST_USER_NET_SET_MTU = 20,
>  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
>  	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
>  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
>  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
>  	VHOST_USER_POSTCOPY_ADVISE = 28,
> @@ -125,6 +127,16 @@
>  	uint16_t queue_size;
>  } VhostUserInflight;
> 
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
>  typedef struct VhostUserMsg {
>  	union {
>  		uint32_t master; /* a VhostUserRequest value */
> @@ -148,6 +160,7 @@
>  		VhostUserCryptoSessionParam crypto_session;
>  		VhostUserVringArea area;
>  		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
>  	} payload;
>  	/* Nothing should be added after the payload */
>  } __rte_packed VhostUserMsg;
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-21  8:33     ` [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
@ 2022-04-25 12:58       ` Xia, Chenbo
  2022-04-26  9:56         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-04-25 12:58 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Andy,

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
> 
> For the block device type, we have to relay
> the commands on all queues.

It's a bit short... although I can understand, please add some background
on current implementation for others to easily understand.

> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++------
> -----
>  1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8ee041f..8d104b7 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
>  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>  	irq_set->start = 0;
>  	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change
> notification */
>  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>  		rte_intr_fd_get(internal->pdev->intr_handle);
> 
>  	for (i = 0; i < nr_vring; i++)
>  		internal->intr_fd[i] = -1;
> 
> -	for (i = 0; i < nr_vring; i++) {
> -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -			if (fd < 0) {
> -				DRV_LOG(ERR, "can't setup eventfd: %s",
> -					strerror(errno));
> -				return -1;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if ((i & 1) == 0 && m_rx == true) {
> +				/* For the net we only need to relay rx queue,
> +				 * which will change the mem of VM.
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> +			}
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if (m_rx == true) {
> +				/* For the blk we need to relay all the read cmd
> +				 * of each queue
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;

Many duplicated code here for blk and net. What if we use this condition to know
creating eventfd or not:

if (m_rx == true && (is_blk_dev || (i & 1) == 0)) {
	/* create eventfd and save now */
}

Thanks,
Chenbo

>  			}
> -			internal->intr_fd[i] = fd;
> -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>  		}
>  	}
> 
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* Re: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-21  8:33     ` [PATCH v6 03/16] vhost: add vhost msg support Andy Pei
  2022-04-25 12:42       ` Xia, Chenbo
@ 2022-04-25 13:04       ` David Marchand
  2022-04-26  8:08         ` Pei, Andy
  1 sibling, 1 reply; 191+ messages in thread
From: David Marchand @ 2022-04-25 13:04 UTC (permalink / raw)
  To: Andy Pei; +Cc: dev, Xia, Chenbo, Maxime Coquelin, gang.cao, Liu, Changpeng

On Thu, Apr 21, 2022 at 11:20 AM Andy Pei <andy.pei@intel.com> wrote:
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 1d39067..3780804 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -80,6 +80,8 @@
>         [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
>         [VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
>         [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> +       [VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> +       [VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
>         [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>         [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
>         [VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
> @@ -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
>
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       struct virtio_net *dev = *pdev;
> +       struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +       int ret = 0;

You must check if there is any fd attached to this message.


> +
> +       if (vdpa_dev->ops->get_config) {
> +               ret = vdpa_dev->ops->get_config(dev->vid,
> +                                          ctx->msg.payload.cfg.region,
> +                                          ctx->msg.payload.cfg.size);
> +               if (ret != 0) {
> +                       ctx->msg.size = 0;
> +                       VHOST_LOG_CONFIG(ERR,
> +                                        "(%s) get_config() return error!\n",
> +                                        dev->ifname);
> +               }
> +       } else {
> +               VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supportted!\n",
> +                                dev->ifname);
> +       }
> +
> +       return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       struct virtio_net *dev = *pdev;
> +       struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +       int ret = 0;

Idem.


> +
> +       if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> +               VHOST_LOG_CONFIG(ERR,
> +                       "(%s) invalid set config msg size: %"PRId32" != %d\n",
> +                       dev->ifname, ctx->msg.size,
> +                       (int)sizeof(struct vhost_user_config));
> +               goto OUT;
> +       }


For info, I posted a series to make this kind of check more systematic.
See: https://patchwork.dpdk.org/project/dpdk/patch/20220425125431.26464-2-david.marchand@redhat.com/



--
David Marchand


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
  2022-04-21  8:33     ` [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-04-25 13:10       ` Xia, Chenbo
  2022-04-26 10:07         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-04-25 13:10 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
> 
> Add SW live-migration support to block device.
> Add dirty page logging to block device.

Add SW live-migration support including dirty page logging for block device.

> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.c |   4 +-
>  drivers/vdpa/ifc/base/ifcvf.h |   6 ++
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++--
> -----
>  3 files changed, 115 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
> index d10c1fd..e417c50 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -191,7 +191,7 @@
>  	IFCVF_WRITE_REG32(val >> 32, hi);
>  }
> 
> -STATIC int
> +int
>  ifcvf_hw_enable(struct ifcvf_hw *hw)
>  {
>  	struct ifcvf_pci_common_cfg *cfg;
> @@ -240,7 +240,7 @@
>  	return 0;
>  }
> 
> -STATIC void
> +void
>  ifcvf_hw_disable(struct ifcvf_hw *hw)
>  {
>  	u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
>  u64
>  ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
> 
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
>  #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8d104b7..a23dc2d 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
>  	}
>  }
> 
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
> +{
> +	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
> +	int i, vid;
> +	uint64_t features = 0;
> +	uint64_t log_base = 0, log_size = 0;
> +	uint64_t len;
> +
> +	vid = internal->vid;
> +
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +
> +	ifcvf_hw_disable(hw);
> +
> +	for (i = 0; i < hw->nr_vring; i++)
> +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> +				hw->vring[i].last_used_idx);
> +
> +	if (internal->sw_lm)
> +		return;
> +
> +	rte_vhost_get_negotiated_features(vid, &features);
> +	if (RTE_VHOST_NEED_LOG(features)) {
> +		ifcvf_disable_logging(hw);
> +		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> +		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> +				log_base, IFCVF_LOG_BASE, log_size);
> +		/*
> +		 * IFCVF marks dirty memory pages for only packet buffer,
> +		 * SW helps to mark the used ring as dirty after device stops.
> +		 */
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> +			rte_vhost_log_used_vring(vid, i, 0, len);
> +		}
> +	}
> +}

Can we consider combining vdpa_ifcvf_blk_pause and vdpa_ifcvf_stop to one function and
check device type internally to do different things? Because as I see, most logic is 
the same.

> +
>  #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
>  		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
>  static int
> @@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
>  		}
>  		hw->vring[i].avail = gpa;
> 
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> -			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
> -			if (gpa == 0) {
> -				DRV_LOG(ERR, "Fail to get GPA for used ring.");
> -				return -1;
> +		if (internal->device_type == IFCVF_NET) {
> +			/* Direct I/O for Tx queue, relay for Rx queue */
> +			if (i & 1) {
> +				gpa = hva_to_gpa(vid,
> (uint64_t)(uintptr_t)vq.used);
> +				if (gpa == 0) {
> +					DRV_LOG(ERR, "Fail to get GPA for used
> ring.");
> +					return -1;
> +				}
> +				hw->vring[i].used = gpa;
> +			} else {
> +				hw->vring[i].used = m_vring_iova +
> +					(char *)internal->m_vring[i].used -
> +					(char *)internal->m_vring[i].desc;
>  			}
> -			hw->vring[i].used = gpa;
> -		} else {
> +		} else if (internal->device_type == IFCVF_BLK) {
> +			/* BLK: relay every queue */
>  			hw->vring[i].used = m_vring_iova +
>  				(char *)internal->m_vring[i].used -
>  				(char *)internal->m_vring[i].desc;
> @@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
>  	}
>  	hw->nr_vring = nr_vring;
> 
> -	return ifcvf_start_hw(&internal->hw);
> +	if (internal->device_type == IFCVF_NET)
> +		return ifcvf_start_hw(&internal->hw);
> +	else if (internal->device_type == IFCVF_BLK)
> +		return ifcvf_hw_enable(&internal->hw);
> 
>  error:
>  	for (i = 0; i < nr_vring; i++)
> @@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
> 
>  	for (i = 0; i < hw->nr_vring; i++) {
>  		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (internal->device_type == IFCVF_NET) {
> +			if ((i & 1) == 0)
> +				update_used_ring(internal, i);
> +		} else if (internal->device_type == IFCVF_BLK) {
>  			update_used_ring(internal, i);
> +		}
> 
>  		rte_vhost_get_vhost_vring(vid, i, &vq);
>  		len = IFCVF_USED_RING_LEN(vq.size);
> @@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
>  		}
>  	}
> 
> -	for (qid = 0; qid < q_num; qid += 2) {
> -		ev.events = EPOLLIN | EPOLLPRI;
> -		/* leave a flag to mark it's for interrupt */
> -		ev.data.u64 = 1 | qid << 1 |
> -			(uint64_t)internal->intr_fd[qid] << 32;
> -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> -				< 0) {
> -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> -			return NULL;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (qid = 0; qid < q_num; qid += 2) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (qid = 0; qid < q_num; qid += 1) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);

It seems we can also reduce duplicate code for above case. And for other checks,
if we can use only one combined condition to check, I prefer to just use one.

Thanks,
Chenbo

>  		}
> -		update_used_ring(internal, qid);
>  	}
> 
>  	/* start relay with a first kick */
> @@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
> 
>  	/* stop the direct IO data path */
>  	unset_notify_relay(internal);
> -	vdpa_ifcvf_stop(internal);
> +	if (internal->device_type == IFCVF_NET)
> +		vdpa_ifcvf_stop(internal);
> +	else if (internal->device_type == IFCVF_BLK)
> +		vdpa_ifcvf_blk_pause(internal);
>  	vdpa_disable_vfio_intr(internal);
> 
>  	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example
  2022-04-21  8:33     ` [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example Andy Pei
@ 2022-04-25 13:38       ` Xia, Chenbo
  2022-04-27  4:11         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-04-25 13:38 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Andy,

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example
> 
> Add virtio blk device support to vDPA example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  examples/vdpa/main.c             |  61 +++++++++++++-
>  examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
>  examples/vdpa/vhost_user.h       | 169
> +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 301 insertions(+), 1 deletion(-)
>  create mode 100644 examples/vdpa/vdpa_blk_compact.h
>  create mode 100644 examples/vdpa/vhost_user.h
> 
> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
> index 5ab0765..1c809ab 100644
> --- a/examples/vdpa/main.c
> +++ b/examples/vdpa/main.c
> @@ -20,6 +20,7 @@
>  #include <cmdline_parse_string.h>
>  #include <cmdline_parse_num.h>
>  #include <cmdline.h>
> +#include "vdpa_blk_compact.h"
> 
>  #define MAX_PATH_LEN 128
>  #define MAX_VDPA_SAMPLE_PORTS 1024
> @@ -41,6 +42,7 @@ struct vdpa_port {
>  static int devcnt;
>  static int interactive;
>  static int client_mode;
> +static int isblk;
> 
>  /* display usage */
>  static void
> @@ -49,7 +51,8 @@ struct vdpa_port {
>  	printf("Usage: %s [EAL options] -- "
>  				 "	--interactive|-i: run in interactive
> mode.\n"
>  				 "	--iface <path>: specify the path prefix of
> the socket files, e.g. /tmp/vhost-user-.\n"
> -				 "	--client: register a vhost-user socket as
> client mode.\n",
> +				 "	--client: register a vhost-user socket as
> client mode.\n"
> +				 "	--isblk: device is a block device, e.g.
> virtio_blk device.\n",
>  				 prgname);
>  }
> 
> @@ -61,6 +64,7 @@ struct vdpa_port {
>  		{"iface", required_argument, NULL, 0},
>  		{"interactive", no_argument, &interactive, 1},
>  		{"client", no_argument, &client_mode, 1},
> +		{"isblk", no_argument, &isblk, 1},

I think a new API for get_device_type will be better than asking user to specify the
device type.

>  		{NULL, 0, 0, 0},
>  	};
>  	int opt, idx;
> @@ -159,6 +163,52 @@ struct vdpa_port {
>  };
> 
>  static int
> +vdpa_blk_device_set_features_and_protocol(const char *path)
> +{
> +	uint64_t protocol_features = 0;
> +	int ret;
> +
> +	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES_BASE);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_disable_features(path,
> +		VHOST_VDPA_BLK_DISABLED_FEATURES);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_disable_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_get_protocol_features(path,
> &protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_get_protocol_features for %s
> failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
> +
> +	ret = rte_vhost_driver_set_protocol_features(path,
> protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_protocol_features for %s
> failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +out:
> +	return ret;
> +}
> +
> +static int
>  start_vdpa(struct vdpa_port *vport)
>  {
>  	int ret;
> @@ -192,6 +242,15 @@ struct vdpa_port {
>  			"attach vdpa device failed: %s\n",
>  			socket_path);
> 
> +	if (isblk) {
> +		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
> +		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
> +		if (ret != 0)
> +			rte_exit(EXIT_FAILURE,
> +				"set vhost blk driver features and protocol
> features failed: %s\n",
> +				socket_path);
> +	}
> +
>  	if (rte_vhost_driver_start(socket_path) < 0)
>  		rte_exit(EXIT_FAILURE,
>  			"start vhost driver failed: %s\n",
> diff --git a/examples/vdpa/vdpa_blk_compact.h
> b/examples/vdpa/vdpa_blk_compact.h

Please remove unused definition in this header. I didn't check all, but it
seems at least VHOST_BLK_PROTOCOL_FEATURES is not used.

> new file mode 100644
> index 0000000..e7c0f22
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.h
> @@ -0,0 +1,72 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _VDPA_BLK_COMPACT_H_
> +#define _VDPA_BLK_COMPACT_H_
> +
> +/**
> + * @file
> + *
> + * Device specific vhost lib
> + */
> +
> +#include <stdbool.h>
> +
> +#include <rte_pci.h>
> +#include <rte_vhost.h>
> +
> +/* Feature bits */
> +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size
> */
> +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments
> */
> +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> +#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
> +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available
> */
> +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is
> available */
> +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
> +
> +/* Legacy feature bits */
> +#ifndef VIRTIO_BLK_NO_LEGACY
> +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru
> */
> +#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
> +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in
> config */
> +
> +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
> +#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
> +#endif /* !VIRTIO_BLK_NO_LEGACY */
> +
> +#ifndef VHOST_USER_F_PROTOCOL_FEATURES
> +#define VHOST_USER_F_PROTOCOL_FEATURES 30
> +#endif
> +
> +#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
> +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> +	(1ULL << VIRTIO_F_VERSION_1) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
> +
> +#define VHOST_BLK_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) |
> \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
> +
> +#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
> \
> +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  |
> \
> +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE)
> | \
> +	(1ULL << VIRTIO_BLK_F_MQ))
> +
> +/* Not supported features */
> +#define VHOST_VDPA_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE)
> | \
> +	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
> +
> +/* Vhost-blk support protocol features */
> +#define VHOST_BLK_PROTOCOL_FEATURES \
> +	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
> +	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
> +
> +#endif /* _VDPA_BLK_COMPACT_H_ */
> diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
> new file mode 100644
> index 0000000..137bbc2
> --- /dev/null
> +++ b/examples/vdpa/vhost_user.h

I don't understand, why introduce this header? It seems never used.

Thanks,
Chenbo

> @@ -0,0 +1,169 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "rte_vhost.h"
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +
> +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +#endif
> +
> +#define VHOST_USER_PROTOCOL_FEATURES	((1ULL <<
> VHOST_USER_PROTOCOL_F_MQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
> +			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
> +
> +typedef enum VhostUserRequest {
> +	VHOST_USER_NONE = 0,
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SET_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> +	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> +	VHOST_USER_POSTCOPY_ADVISE = 28,
> +	VHOST_USER_POSTCOPY_LISTEN = 29,
> +	VHOST_USER_POSTCOPY_END = 30,
> +	VHOST_USER_GET_INFLIGHT_FD = 31,
> +	VHOST_USER_SET_INFLIGHT_FD = 32,
> +	VHOST_USER_MAX = 33
> +} VhostUserRequest;
> +
> +typedef enum VhostUserSlaveRequest {
> +	VHOST_USER_SLAVE_NONE = 0,
> +	VHOST_USER_SLAVE_IOTLB_MSG = 1,
> +	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
> +	VHOST_USER_SLAVE_MAX
> +} VhostUserSlaveRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +	uint64_t guest_phys_addr;
> +	uint64_t memory_size;
> +	uint64_t userspace_addr;
> +	uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +	uint32_t nregions;
> +	uint32_t padding;
> +	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserLog {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +} VhostUserLog;
> +
> +/* Comply with Cryptodev-Linux */
> +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
> +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
> +
> +/* Same structure as vhost-user backend session info */
> +typedef struct VhostUserCryptoSessionParam {
> +	int64_t session_id;
> +	uint32_t op_code;
> +	uint32_t cipher_algo;
> +	uint32_t cipher_key_len;
> +	uint32_t hash_algo;
> +	uint32_t digest_len;
> +	uint32_t auth_key_len;
> +	uint32_t aad_len;
> +	uint8_t op_type;
> +	uint8_t dir;
> +	uint8_t hash_mode;
> +	uint8_t chaining_dir;
> +	uint8_t *ciphe_key;
> +	uint8_t *auth_key;
> +	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
> +	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
> +} VhostUserCryptoSessionParam;
> +
> +typedef struct VhostUserVringArea {
> +	uint64_t u64;
> +	uint64_t size;
> +	uint64_t offset;
> +} VhostUserVringArea;
> +
> +typedef struct VhostUserInflight {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +	uint16_t num_queues;
> +	uint16_t queue_size;
> +} VhostUserInflight;
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
> +typedef struct VhostUserMsg {
> +	union {
> +		uint32_t master; /* a VhostUserRequest value */
> +		uint32_t slave;  /* a VhostUserSlaveRequest value*/
> +	} request;
> +
> +#define VHOST_USER_VERSION_MASK     0x3
> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> +#define VHOST_USER_NEED_REPLY		(0x1 << 3)
> +	uint32_t flags;
> +	uint32_t size; /* the following payload size */
> +	union {
> +#define VHOST_USER_VRING_IDX_MASK   0xff
> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> +		uint64_t u64;
> +		struct vhost_vring_state state;
> +		struct vhost_vring_addr addr;
> +		VhostUserMemory memory;
> +		VhostUserLog    log;
> +		struct vhost_iotlb_msg iotlb;
> +		VhostUserCryptoSessionParam crypto_session;
> +		VhostUserVringArea area;
> +		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
> +	} payload;
> +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> +	int fd_num;
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    0x1
> +#endif
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 08/16] usertools: add support for virtio blk device
  2022-04-21  8:33     ` [PATCH v6 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-04-25 13:53       ` Xia, Chenbo
  2022-04-26  4:13         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-04-25 13:53 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, david.marchand

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 08/16] usertools: add support for virtio blk device
> 
> Add virtio blk device support to devbind.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  usertools/dpdk-devbind.py | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
> index ace4627..cbe336f 100755
> --- a/usertools/dpdk-devbind.py
> +++ b/usertools/dpdk-devbind.py
> @@ -14,6 +14,8 @@
>  from os.path import join as path_join

Supporting it in this script or not, it should not be named as 'XXX_class'.
And even may not be defined as new class. Maybe go to misc devices for now?
I'd like to hear others' opinion.

And Stephen commented about using driverctl, please reply to that.

Thanks,
Chenbo

> 
>  # The PCI base class for all devices
> +virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
> +                    'SVendor': '8086', 'SDevice': '0002'}
>  network_class = {'Class': '02', 'Vendor': None, 'Device': None,
>                   'SVendor': None, 'SDevice': None}
>  acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
> @@ -72,6 +74,7 @@
>  cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
>                   'SVendor': None, 'SDevice': None}
> 
> +virtio_blk_devices = [virtio_blk_class]
>  network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
>  baseband_devices = [acceleration_class]
>  crypto_devices = [encryption_class, intel_processor_class]
> @@ -587,6 +590,9 @@ def show_status():
>      Displays to the user what devices are bound to the igb_uio driver,
> the
>      kernel driver or to no driver'''
> 
> +    if status_dev in ["virtio_blk", "all"]:
> +        show_device_status(virtio_blk_devices, "virtio_blk")
> +
>      if status_dev in ["net", "all"]:
>          show_device_status(network_devices, "Network", if_field=True)
> 
> @@ -746,6 +752,7 @@ def do_arg_actions():
>          if b_flag is not None:
>              clear_data()
>              # refresh if we have changed anything
> +            get_device_details(virtio_blk_devices)
>              get_device_details(network_devices)
>              get_device_details(baseband_devices)
>              get_device_details(crypto_devices)
> @@ -769,6 +776,7 @@ def main():
>      parse_args()
>      check_modules()
>      clear_data()
> +    get_device_details(virtio_blk_devices)
>      get_device_details(network_devices)
>      get_device_details(baseband_devices)
>      get_device_details(crypto_devices)
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 08/16] usertools: add support for virtio blk device
  2022-04-25 13:53       ` Xia, Chenbo
@ 2022-04-26  4:13         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-04-26  4:13 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, david.marchand

Hi Chenbo,

Thanks for your reply.
I will send out a new version, and put blk device into misc devices.

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 9:53 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; david.marchand@redhat.com
> Subject: RE: [PATCH v6 08/16] usertools: add support for virtio blk device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 08/16] usertools: add support for virtio blk device
> >
> > Add virtio blk device support to devbind.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  usertools/dpdk-devbind.py | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> >
> > diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
> > index ace4627..cbe336f 100755
> > --- a/usertools/dpdk-devbind.py
> > +++ b/usertools/dpdk-devbind.py
> > @@ -14,6 +14,8 @@
> >  from os.path import join as path_join
> 
> Supporting it in this script or not, it should not be named as 'XXX_class'.
> And even may not be defined as new class. Maybe go to misc devices for
> now?
> I'd like to hear others' opinion.
> 
> And Stephen commented about using driverctl, please reply to that.
> 
> Thanks,
> Chenbo
> 
> >
> >  # The PCI base class for all devices
> > +virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
> > +                    'SVendor': '8086', 'SDevice': '0002'}
> >  network_class = {'Class': '02', 'Vendor': None, 'Device': None,
> >                   'SVendor': None, 'SDevice': None}
> > acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
> > @@ -72,6 +74,7 @@  cn9k_ree = {'Class': '08', 'Vendor': '177d',
> > 'Device': 'a0f4',
> >                   'SVendor': None, 'SDevice': None}
> >
> > +virtio_blk_devices = [virtio_blk_class]
> >  network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
> > baseband_devices = [acceleration_class]  crypto_devices =
> > [encryption_class, intel_processor_class] @@ -587,6 +590,9 @@ def
> > show_status():
> >      Displays to the user what devices are bound to the igb_uio
> > driver, the
> >      kernel driver or to no driver'''
> >
> > +    if status_dev in ["virtio_blk", "all"]:
> > +        show_device_status(virtio_blk_devices, "virtio_blk")
> > +
> >      if status_dev in ["net", "all"]:
> >          show_device_status(network_devices, "Network", if_field=True)
> >
> > @@ -746,6 +752,7 @@ def do_arg_actions():
> >          if b_flag is not None:
> >              clear_data()
> >              # refresh if we have changed anything
> > +            get_device_details(virtio_blk_devices)
> >              get_device_details(network_devices)
> >              get_device_details(baseband_devices)
> >              get_device_details(crypto_devices)
> > @@ -769,6 +776,7 @@ def main():
> >      parse_args()
> >      check_modules()
> >      clear_data()
> > +    get_device_details(virtio_blk_devices)
> >      get_device_details(network_devices)
> >      get_device_details(baseband_devices)
> >      get_device_details(crypto_devices)
> > --
> > 1.8.3.1
> 


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-25 13:04       ` David Marchand
@ 2022-04-26  8:08         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-04-26  8:08 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Xia, Chenbo, Maxime Coquelin, Cao, Gang, Liu, Changpeng

HI David,

Thanks for your reply.
I will send out a version to address that.

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Monday, April 25, 2022 9:05 PM
> To: Pei, Andy <andy.pei@intel.com>
> Cc: dev <dev@dpdk.org>; Xia, Chenbo <chenbo.xia@intel.com>; Maxime
> Coquelin <maxime.coquelin@redhat.com>; Cao, Gang
> <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: Re: [PATCH v6 03/16] vhost: add vhost msg support
> 
> On Thu, Apr 21, 2022 at 11:20 AM Andy Pei <andy.pei@intel.com> wrote:
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 1d39067..3780804 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -80,6 +80,8 @@
> >         [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> >         [VHOST_USER_SET_SLAVE_REQ_FD]  =
> "VHOST_USER_SET_SLAVE_REQ_FD",
> >         [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > +       [VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > +       [VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> >         [VHOST_USER_CRYPTO_CREATE_SESS] =
> "VHOST_USER_CRYPTO_CREATE_SESS",
> >         [VHOST_USER_CRYPTO_CLOSE_SESS] =
> "VHOST_USER_CRYPTO_CLOSE_SESS",
> >         [VHOST_USER_POSTCOPY_ADVISE]  =
> "VHOST_USER_POSTCOPY_ADVISE",
> > @@ -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net
> > *dev,  }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +                       struct vhu_msg_context *ctx,
> > +                       int main_fd __rte_unused) {
> > +       struct virtio_net *dev = *pdev;
> > +       struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +       int ret = 0;
> 
> You must check if there is any fd attached to this message.
> 
> 
> > +
> > +       if (vdpa_dev->ops->get_config) {
> > +               ret = vdpa_dev->ops->get_config(dev->vid,
> > +                                          ctx->msg.payload.cfg.region,
> > +                                          ctx->msg.payload.cfg.size);
> > +               if (ret != 0) {
> > +                       ctx->msg.size = 0;
> > +                       VHOST_LOG_CONFIG(ERR,
> > +                                        "(%s) get_config() return error!\n",
> > +                                        dev->ifname);
> > +               }
> > +       } else {
> > +               VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supportted!\n",
> > +                                dev->ifname);
> > +       }
> > +
> > +       return RTE_VHOST_MSG_RESULT_REPLY; }
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +                       struct vhu_msg_context *ctx,
> > +                       int main_fd __rte_unused) {
> > +       struct virtio_net *dev = *pdev;
> > +       struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +       int ret = 0;
> 
> Idem.
> 
> 
> > +
> > +       if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> > +               VHOST_LOG_CONFIG(ERR,
> > +                       "(%s) invalid set config msg size: %"PRId32" != %d\n",
> > +                       dev->ifname, ctx->msg.size,
> > +                       (int)sizeof(struct vhost_user_config));
> > +               goto OUT;
> > +       }
> 
> 
> For info, I posted a series to make this kind of check more systematic.
> See:
> https://patchwork.dpdk.org/project/dpdk/patch/20220425125431.26464-2-
> david.marchand@redhat.com/
> 
> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-25 12:42       ` Xia, Chenbo
@ 2022-04-26  8:55         ` Pei, Andy
  2022-04-26  9:17           ` Xia, Chenbo
  0 siblings, 1 reply; 191+ messages in thread
From: Pei, Andy @ 2022-04-26  8:55 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

HI Chenbo, 

Thanks for your reply.
My reply is inline.

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 8:42 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> 
> Hi Andy,
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 03/16] vhost: add vhost msg support
> >
> > Add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG.
> > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > supported by virtio blk VDPA device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  lib/vhost/vhost_user.c | 69
> > ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/vhost/vhost_user.h | 13 ++++++++++
> >  2 files changed, 82 insertions(+)
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 1d39067..3780804 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -80,6 +80,8 @@
> >  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> >  	[VHOST_USER_SET_SLAVE_REQ_FD]  =
> "VHOST_USER_SET_SLAVE_REQ_FD",
> >  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> >  	[VHOST_USER_CRYPTO_CREATE_SESS] =
> "VHOST_USER_CRYPTO_CREATE_SESS",
> >  	[VHOST_USER_CRYPTO_CLOSE_SESS] =
> "VHOST_USER_CRYPTO_CLOSE_SESS",
> >  	[VHOST_USER_POSTCOPY_ADVISE]  =
> "VHOST_USER_POSTCOPY_ADVISE", @@
> > -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
> > }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (vdpa_dev->ops->get_config) {
> > +		ret = vdpa_dev->ops->get_config(dev->vid,
> > +					   ctx->msg.payload.cfg.region,
> > +					   ctx->msg.payload.cfg.size);
> > +		if (ret != 0) {
> > +			ctx->msg.size = 0;
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) get_config() return error!\n",
> > +					 dev->ifname);
> > +		}
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> supportted!\n",
> 
> Supported
> 
I will send out a new version to fix this.
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_REPLY;
> > +}
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> 
> I think you should do sanity check on payload.cfg.size and make sure it's
> smaller than VHOST_USER_MAX_CONFIG_SIZE
> 
> and same check for offset
> 
I think payload.cfg.size can be smaller than or equal to VHOST_USER_MAX_CONFIG_SIZE.
payload.cfg.ofset can be smaller than or equal to VHOST_USER_MAX_CONFIG_SIZE as well

> > +		VHOST_LOG_CONFIG(ERR,
> > +			"(%s) invalid set config msg size: %"PRId32" != %d\n",
> > +			dev->ifname, ctx->msg.size,
> 
> Based on you will change the log too, payload.cfg.size is uint32_t, so PRId32 ->
> PRIu32
> 
> > +			(int)sizeof(struct vhost_user_config));
> 
> So this can be %u
> 
Sure.
> > +		goto OUT;
> > +	}
> > +
> > +	if (vdpa_dev->ops->set_config) {
> > +		ret = vdpa_dev->ops->set_config(dev->vid,
> > +			ctx->msg.payload.cfg.region,
> > +			ctx->msg.payload.cfg.offset,
> > +			ctx->msg.payload.cfg.size,
> > +			ctx->msg.payload.cfg.flags);
> > +		if (ret)
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) set_config() return error!\n",
> > +					 dev->ifname);
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> supportted!\n",
> 
> Supported
> 
I will send out a new version to fix this.
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_OK;
> > +
> > +OUT:
> 
> Lower case looks better
> 
OK. I will send out a new version to fix this.
> > +	return RTE_VHOST_MSG_RESULT_ERR;
> > +}
> 
> Almost all handlers need check on expected fd num (this case is 0), so the
> above new handlers should also do that. Please refer to validate_msg_fds in
> other handlers.
> 
> BTW, you can wait for review for other patches and send new versions later.
> 
I will send out new patch after vhost: validate fds attached to messages from David Marchand is merged.
> Thanks,
> Chenbo
> 
> > +
> > +static int
> >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> >  			struct vhu_msg_context *ctx,
> >  			int main_fd __rte_unused)
> > @@ -2782,6 +2849,8 @@ typedef int (*vhost_message_handler_t)(struct
> > virtio_net **pdev,
> >  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
> >  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
> >  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> > +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> >  	[VHOST_USER_POSTCOPY_ADVISE] =
> vhost_user_set_postcopy_advise,
> >  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> >  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> git
> > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > c946cc2..97cfb2f 100644
> > --- a/lib/vhost/vhost_user.h
> > +++ b/lib/vhost/vhost_user.h
> > @@ -50,6 +50,8 @@
> >  	VHOST_USER_NET_SET_MTU = 20,
> >  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> >  	VHOST_USER_IOTLB_MSG = 22,
> > +	VHOST_USER_GET_CONFIG = 24,
> > +	VHOST_USER_SET_CONFIG = 25,
> >  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> >  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> >  	VHOST_USER_POSTCOPY_ADVISE = 28,
> > @@ -125,6 +127,16 @@
> >  	uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +#define VHOST_USER_MAX_CONFIG_SIZE		256
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +	uint32_t offset;
> > +	uint32_t size;
> > +	uint32_t flags;
> > +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
> > +
> >  typedef struct VhostUserMsg {
> >  	union {
> >  		uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> +160,7 @@
> >  		VhostUserCryptoSessionParam crypto_session;
> >  		VhostUserVringArea area;
> >  		VhostUserInflight inflight;
> > +		struct vhost_user_config cfg;
> >  	} payload;
> >  	/* Nothing should be added after the payload */  } __rte_packed
> > VhostUserMsg;
> > --
> > 1.8.3.1
> 


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-26  8:55         ` Pei, Andy
@ 2022-04-26  9:17           ` Xia, Chenbo
  2022-04-27  4:12             ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-04-26  9:17 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Tuesday, April 26, 2022 4:56 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> 
> HI Chenbo,
> 
> Thanks for your reply.
> My reply is inline.
> 
> > -----Original Message-----
> > From: Xia, Chenbo <chenbo.xia@intel.com>
> > Sent: Monday, April 25, 2022 8:42 PM
> > To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > Changpeng <changpeng.liu@intel.com>
> > Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> >
> > Hi Andy,
> >
> > > -----Original Message-----
> > > From: Pei, Andy <andy.pei@intel.com>
> > > Sent: Thursday, April 21, 2022 4:34 PM
> > > To: dev@dpdk.org
> > > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > > <changpeng.liu@intel.com>
> > > Subject: [PATCH v6 03/16] vhost: add vhost msg support
> > >
> > > Add support for VHOST_USER_GET_CONFIG and
> > VHOST_USER_SET_CONFIG.
> > > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > > supported by virtio blk VDPA device.
> > >
> > > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > > ---
> > >  lib/vhost/vhost_user.c | 69
> > > ++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  lib/vhost/vhost_user.h | 13 ++++++++++
> > >  2 files changed, 82 insertions(+)
> > >
> > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > > 1d39067..3780804 100644
> > > --- a/lib/vhost/vhost_user.c
> > > +++ b/lib/vhost/vhost_user.c
> > > @@ -80,6 +80,8 @@
> > >  [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> > >  [VHOST_USER_SET_SLAVE_REQ_FD]  =
> > "VHOST_USER_SET_SLAVE_REQ_FD",
> > >  [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > > +[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > > +[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> > >  [VHOST_USER_CRYPTO_CREATE_SESS] =
> > "VHOST_USER_CRYPTO_CREATE_SESS",
> > >  [VHOST_USER_CRYPTO_CLOSE_SESS] =
> > "VHOST_USER_CRYPTO_CLOSE_SESS",
> > >  [VHOST_USER_POSTCOPY_ADVISE]  =
> > "VHOST_USER_POSTCOPY_ADVISE", @@
> > > -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
> > > }
> > >
> > >  static int
> > > +vhost_user_get_config(struct virtio_net **pdev,
> > > +struct vhu_msg_context *ctx,
> > > +int main_fd __rte_unused)
> > > +{
> > > +struct virtio_net *dev = *pdev;
> > > +struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > > +int ret = 0;
> > > +
> > > +if (vdpa_dev->ops->get_config) {
> > > +ret = vdpa_dev->ops->get_config(dev->vid,
> > > +   ctx->msg.payload.cfg.region,
> > > +   ctx->msg.payload.cfg.size);
> > > +if (ret != 0) {
> > > +ctx->msg.size = 0;
> > > +VHOST_LOG_CONFIG(ERR,
> > > + "(%s) get_config() return error!\n",
> > > + dev->ifname);
> > > +}
> > > +} else {
> > > +VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> > supportted!\n",
> >
> > Supported
> >
> I will send out a new version to fix this.
> > > + dev->ifname);
> > > +}
> > > +
> > > +return RTE_VHOST_MSG_RESULT_REPLY;
> > > +}
> > > +
> > > +static int
> > > +vhost_user_set_config(struct virtio_net **pdev,
> > > +struct vhu_msg_context *ctx,
> > > +int main_fd __rte_unused)
> > > +{
> > > +struct virtio_net *dev = *pdev;
> > > +struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > > +int ret = 0;
> > > +
> > > +if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> >
> > I think you should do sanity check on payload.cfg.size and make sure
> it's
> > smaller than VHOST_USER_MAX_CONFIG_SIZE
> >
> > and same check for offset
> >
> I think payload.cfg.size can be smaller than or equal to
> VHOST_USER_MAX_CONFIG_SIZE.
> payload.cfg.ofset can be smaller than or equal to
> VHOST_USER_MAX_CONFIG_SIZE as well

After double check: offset is the config space offset, so this should be checked
in vdpa driver. Size check on vhost lib layer should be just <= MAX_you_defined

Thanks,
Chenbo

> 
> > > +VHOST_LOG_CONFIG(ERR,
> > > +"(%s) invalid set config msg size: %"PRId32" != %d\n",
> > > +dev->ifname, ctx->msg.size,
> >
> > Based on you will change the log too, payload.cfg.size is uint32_t, so
> PRId32 ->
> > PRIu32
> >
> > > +(int)sizeof(struct vhost_user_config));
> >
> > So this can be %u
> >
> Sure.
> > > +goto OUT;
> > > +}
> > > +
> > > +if (vdpa_dev->ops->set_config) {
> > > +ret = vdpa_dev->ops->set_config(dev->vid,
> > > +ctx->msg.payload.cfg.region,
> > > +ctx->msg.payload.cfg.offset,
> > > +ctx->msg.payload.cfg.size,
> > > +ctx->msg.payload.cfg.flags);
> > > +if (ret)
> > > +VHOST_LOG_CONFIG(ERR,
> > > + "(%s) set_config() return error!\n",
> > > + dev->ifname);
> > > +} else {
> > > +VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> > supportted!\n",
> >
> > Supported
> >
> I will send out a new version to fix this.
> > > + dev->ifname);
> > > +}
> > > +
> > > +return RTE_VHOST_MSG_RESULT_OK;
> > > +
> > > +OUT:
> >
> > Lower case looks better
> >
> OK. I will send out a new version to fix this.
> > > +return RTE_VHOST_MSG_RESULT_ERR;
> > > +}
> >
> > Almost all handlers need check on expected fd num (this case is 0), so
> the
> > above new handlers should also do that. Please refer to validate_msg_fds
> in
> > other handlers.
> >
> > BTW, you can wait for review for other patches and send new versions
> later.
> >
> I will send out new patch after vhost: validate fds attached to messages
> from David Marchand is merged.
> > Thanks,
> > Chenbo
> >
> > > +
> > > +static int
> > >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> > >  struct vhu_msg_context *ctx,
> > >  int main_fd __rte_unused)
> > > @@ -2782,6 +2849,8 @@ typedef int (*vhost_message_handler_t)(struct
> > > virtio_net **pdev,
> > >  [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
> > >  [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
> > >  [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> > > +[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > > +[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> > >  [VHOST_USER_POSTCOPY_ADVISE] =
> > vhost_user_set_postcopy_advise,
> > >  [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> > >  [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> > git
> > > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > > c946cc2..97cfb2f 100644
> > > --- a/lib/vhost/vhost_user.h
> > > +++ b/lib/vhost/vhost_user.h
> > > @@ -50,6 +50,8 @@
> > >  VHOST_USER_NET_SET_MTU = 20,
> > >  VHOST_USER_SET_SLAVE_REQ_FD = 21,
> > >  VHOST_USER_IOTLB_MSG = 22,
> > > +VHOST_USER_GET_CONFIG = 24,
> > > +VHOST_USER_SET_CONFIG = 25,
> > >  VHOST_USER_CRYPTO_CREATE_SESS = 26,
> > >  VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> > >  VHOST_USER_POSTCOPY_ADVISE = 28,
> > > @@ -125,6 +127,16 @@
> > >  uint16_t queue_size;
> > >  } VhostUserInflight;
> > >
> > > +#define VHOST_USER_MAX_CONFIG_SIZE256
> > > +
> > > +/** Get/set config msg payload */
> > > +struct vhost_user_config {
> > > +uint32_t offset;
> > > +uint32_t size;
> > > +uint32_t flags;
> > > +uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > > +};
> > > +
> > >  typedef struct VhostUserMsg {
> > >  union {
> > >  uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> > +160,7 @@
> > >  VhostUserCryptoSessionParam crypto_session;
> > >  VhostUserVringArea area;
> > >  VhostUserInflight inflight;
> > > +struct vhost_user_config cfg;
> > >  } payload;
> > >  /* Nothing should be added after the payload */  } __rte_packed
> > > VhostUserMsg;
> > > --
> > > 1.8.3.1
> >
> 


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-25 12:58       ` Xia, Chenbo
@ 2022-04-26  9:56         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-04-26  9:56 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Chenbo,

Thanks for your reply.
My reply is inline.

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 8:58 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
> 
> Hi Andy,
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
> >
> > For the block device type, we have to relay the commands on all
> > queues.
> 
> It's a bit short... although I can understand, please add some background on
> current implementation for others to easily understand.
> 
Sure, I will send a new patch set to address this.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 46
> > ++++++++++++++++++++++++++++++++------
> > -----
> >  1 file changed, 35 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8ee041f..8d104b7 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
> >  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> >  	irq_set->start = 0;
> >  	fd_ptr = (int *)&irq_set->data;
> > +	/* The first interrupt is for the configure space change
> > notification */
> >  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
> >  		rte_intr_fd_get(internal->pdev->intr_handle);
> >
> >  	for (i = 0; i < nr_vring; i++)
> >  		internal->intr_fd[i] = -1;
> >
> > -	for (i = 0; i < nr_vring; i++) {
> > -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> > -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> > -		if ((i & 1) == 0 && m_rx == true) {
> > -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> > -			if (fd < 0) {
> > -				DRV_LOG(ERR, "can't setup eventfd: %s",
> > -					strerror(errno));
> > -				return -1;
> > +	if (internal->device_type == IFCVF_NET) {
> > +		for (i = 0; i < nr_vring; i++) {
> > +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> > +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> > +			if ((i & 1) == 0 && m_rx == true) {
> > +				/* For the net we only need to relay rx queue,
> > +				 * which will change the mem of VM.
> > +				 */
> > +				fd = eventfd(0, EFD_NONBLOCK |
> EFD_CLOEXEC);
> > +				if (fd < 0) {
> > +					DRV_LOG(ERR, "can't setup
> eventfd: %s",
> > +						strerror(errno));
> > +					return -1;
> > +				}
> > +				internal->intr_fd[i] = fd;
> > +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> > +			}
> > +		}
> > +	} else if (internal->device_type == IFCVF_BLK) {
> > +		for (i = 0; i < nr_vring; i++) {
> > +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> > +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> > +			if (m_rx == true) {
> > +				/* For the blk we need to relay all the read
> cmd
> > +				 * of each queue
> > +				 */
> > +				fd = eventfd(0, EFD_NONBLOCK |
> EFD_CLOEXEC);
> > +				if (fd < 0) {
> > +					DRV_LOG(ERR, "can't setup
> eventfd: %s",
> > +						strerror(errno));
> > +					return -1;
> > +				}
> > +				internal->intr_fd[i] = fd;
> > +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> 
> Many duplicated code here for blk and net. What if we use this condition to
> know creating eventfd or not:
> 
> if (m_rx == true && (is_blk_dev || (i & 1) == 0)) {
> 	/* create eventfd and save now */
> }
> 
Sure, I will send a new patch set to address this.
> Thanks,
> Chenbo
> 
> >  			}
> > -			internal->intr_fd[i] = fd;
> > -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> >  		}
> >  	}
> >
> > --
> > 1.8.3.1
> 


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
  2022-04-25 13:10       ` Xia, Chenbo
@ 2022-04-26 10:07         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-04-26 10:07 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Chenbo,

Thanks for your reply.
My reply is inline.

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 9:10 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
> >
> > Add SW live-migration support to block device.
> > Add dirty page logging to block device.
> 
> Add SW live-migration support including dirty page logging for block device.
> 
Sure, I will remove " Add dirty page logging to block device." In next version.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.c |   4 +-
> >  drivers/vdpa/ifc/base/ifcvf.h |   6 ++
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 128
> > +++++++++++++++++++++++++++++++++++--
> > -----
> >  3 files changed, 115 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.c
> > b/drivers/vdpa/ifc/base/ifcvf.c index d10c1fd..e417c50 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.c
> > +++ b/drivers/vdpa/ifc/base/ifcvf.c
> > @@ -191,7 +191,7 @@
> >  	IFCVF_WRITE_REG32(val >> 32, hi);
> >  }
> >
> > -STATIC int
> > +int
> >  ifcvf_hw_enable(struct ifcvf_hw *hw)
> >  {
> >  	struct ifcvf_pci_common_cfg *cfg;
> > @@ -240,7 +240,7 @@
> >  	return 0;
> >  }
> >
> > -STATIC void
> > +void
> >  ifcvf_hw_disable(struct ifcvf_hw *hw)  {
> >  	u32 i;
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.h
> > b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.h
> > +++ b/drivers/vdpa/ifc/base/ifcvf.h
> > @@ -179,4 +179,10 @@ struct ifcvf_hw {
> >  u64
> >  ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
> >
> > +int
> > +ifcvf_hw_enable(struct ifcvf_hw *hw);
> > +
> > +void
> > +ifcvf_hw_disable(struct ifcvf_hw *hw);
> > +
> >  #endif /* _IFCVF_H_ */
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8d104b7..a23dc2d 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
> >  	}
> >  }
> >
> > +static void
> > +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) {
> > +	struct ifcvf_hw *hw = &internal->hw;
> > +	struct rte_vhost_vring vq;
> > +	int i, vid;
> > +	uint64_t features = 0;
> > +	uint64_t log_base = 0, log_size = 0;
> > +	uint64_t len;
> > +
> > +	vid = internal->vid;
> > +
> > +	if (internal->device_type == IFCVF_BLK) {
> > +		for (i = 0; i < hw->nr_vring; i++) {
> > +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> > +			while (vq.avail->idx != vq.used->idx) {
> > +				ifcvf_notify_queue(hw, i);
> > +				usleep(10);
> > +			}
> > +			hw->vring[i].last_avail_idx = vq.avail->idx;
> > +			hw->vring[i].last_used_idx = vq.used->idx;
> > +		}
> > +	}
> > +
> > +	ifcvf_hw_disable(hw);
> > +
> > +	for (i = 0; i < hw->nr_vring; i++)
> > +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> > +				hw->vring[i].last_used_idx);
> > +
> > +	if (internal->sw_lm)
> > +		return;
> > +
> > +	rte_vhost_get_negotiated_features(vid, &features);
> > +	if (RTE_VHOST_NEED_LOG(features)) {
> > +		ifcvf_disable_logging(hw);
> > +		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> > +		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> > +				log_base, IFCVF_LOG_BASE, log_size);
> > +		/*
> > +		 * IFCVF marks dirty memory pages for only packet buffer,
> > +		 * SW helps to mark the used ring as dirty after device stops.
> > +		 */
> > +		for (i = 0; i < hw->nr_vring; i++) {
> > +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> > +			rte_vhost_log_used_vring(vid, i, 0, len);
> > +		}
> > +	}
> > +}
> 
> Can we consider combining vdpa_ifcvf_blk_pause and vdpa_ifcvf_stop to
> one function and check device type internally to do different things? Because
> as I see, most logic is the same.
> 
OK, I will address it in next version.
> > +
> >  #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
> >  		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))  static int @@ -
> 659,15
> > +709,22 @@ struct rte_vdpa_dev_info {
> >  		}
> >  		hw->vring[i].avail = gpa;
> >
> > -		/* Direct I/O for Tx queue, relay for Rx queue */
> > -		if (i & 1) {
> > -			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
> > -			if (gpa == 0) {
> > -				DRV_LOG(ERR, "Fail to get GPA for used
> ring.");
> > -				return -1;
> > +		if (internal->device_type == IFCVF_NET) {
> > +			/* Direct I/O for Tx queue, relay for Rx queue */
> > +			if (i & 1) {
> > +				gpa = hva_to_gpa(vid,
> > (uint64_t)(uintptr_t)vq.used);
> > +				if (gpa == 0) {
> > +					DRV_LOG(ERR, "Fail to get GPA for
> used
> > ring.");
> > +					return -1;
> > +				}
> > +				hw->vring[i].used = gpa;
> > +			} else {
> > +				hw->vring[i].used = m_vring_iova +
> > +					(char *)internal->m_vring[i].used -
> > +					(char *)internal->m_vring[i].desc;
> >  			}
> > -			hw->vring[i].used = gpa;
> > -		} else {
> > +		} else if (internal->device_type == IFCVF_BLK) {
> > +			/* BLK: relay every queue */
> >  			hw->vring[i].used = m_vring_iova +
> >  				(char *)internal->m_vring[i].used -
> >  				(char *)internal->m_vring[i].desc; @@ -686,7
> +743,10 @@ struct
> > rte_vdpa_dev_info {
> >  	}
> >  	hw->nr_vring = nr_vring;
> >
> > -	return ifcvf_start_hw(&internal->hw);
> > +	if (internal->device_type == IFCVF_NET)
> > +		return ifcvf_start_hw(&internal->hw);
> > +	else if (internal->device_type == IFCVF_BLK)
> > +		return ifcvf_hw_enable(&internal->hw);
> >
> >  error:
> >  	for (i = 0; i < nr_vring; i++)
> > @@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
> >
> >  	for (i = 0; i < hw->nr_vring; i++) {
> >  		/* synchronize remaining new used entries if any */
> > -		if ((i & 1) == 0)
> > +		if (internal->device_type == IFCVF_NET) {
> > +			if ((i & 1) == 0)
> > +				update_used_ring(internal, i);
> > +		} else if (internal->device_type == IFCVF_BLK) {
> >  			update_used_ring(internal, i);
> > +		}
> >
> >  		rte_vhost_get_vhost_vring(vid, i, &vq);
> >  		len = IFCVF_USED_RING_LEN(vq.size); @@ -773,17 +837,36
> @@ struct
> > rte_vdpa_dev_info {
> >  		}
> >  	}
> >
> > -	for (qid = 0; qid < q_num; qid += 2) {
> > -		ev.events = EPOLLIN | EPOLLPRI;
> > -		/* leave a flag to mark it's for interrupt */
> > -		ev.data.u64 = 1 | qid << 1 |
> > -			(uint64_t)internal->intr_fd[qid] << 32;
> > -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid],
> &ev)
> > -				< 0) {
> > -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> > -			return NULL;
> > +	if (internal->device_type == IFCVF_NET) {
> > +		for (qid = 0; qid < q_num; qid += 2) {
> > +			ev.events = EPOLLIN | EPOLLPRI;
> > +			/* leave a flag to mark it's for interrupt */
> > +			ev.data.u64 = 1 | qid << 1 |
> > +				(uint64_t)internal->intr_fd[qid] << 32;
> > +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> > +				      internal->intr_fd[qid], &ev)
> > +					< 0) {
> > +				DRV_LOG(ERR, "epoll add error: %s",
> > +					strerror(errno));
> > +				return NULL;
> > +			}
> > +			update_used_ring(internal, qid);
> > +		}
> > +	} else if (internal->device_type == IFCVF_BLK) {
> > +		for (qid = 0; qid < q_num; qid += 1) {
> > +			ev.events = EPOLLIN | EPOLLPRI;
> > +			/* leave a flag to mark it's for interrupt */
> > +			ev.data.u64 = 1 | qid << 1 |
> > +				(uint64_t)internal->intr_fd[qid] << 32;
> > +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> > +				      internal->intr_fd[qid], &ev)
> > +					< 0) {
> > +				DRV_LOG(ERR, "epoll add error: %s",
> > +					strerror(errno));
> > +				return NULL;
> > +			}
> > +			update_used_ring(internal, qid);
> 
> It seems we can also reduce duplicate code for above case. And for other
> checks, if we can use only one combined condition to check, I prefer to just
> use one.
> 
OK, I will address it in next version.
> Thanks,
> Chenbo
> 
> >  		}
> > -		update_used_ring(internal, qid);
> >  	}
> >
> >  	/* start relay with a first kick */
> > @@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
> >
> >  	/* stop the direct IO data path */
> >  	unset_notify_relay(internal);
> > -	vdpa_ifcvf_stop(internal);
> > +	if (internal->device_type == IFCVF_NET)
> > +		vdpa_ifcvf_stop(internal);
> > +	else if (internal->device_type == IFCVF_BLK)
> > +		vdpa_ifcvf_blk_pause(internal);
> >  	vdpa_disable_vfio_intr(internal);
> >
> >  	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL,
> false);
> > --
> > 1.8.3.1
> 


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example
  2022-04-25 13:38       ` Xia, Chenbo
@ 2022-04-27  4:11         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-04-27  4:11 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Chenbo,

Thanks for your reply.
My reply is inline.

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 9:39 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in
> example
> 
> Hi Andy,
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in
> > example
> >
> > Add virtio blk device support to vDPA example.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  examples/vdpa/main.c             |  61 +++++++++++++-
> >  examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
> >  examples/vdpa/vhost_user.h       | 169
> > +++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 301 insertions(+), 1 deletion(-)  create mode 100644
> > examples/vdpa/vdpa_blk_compact.h  create mode 100644
> > examples/vdpa/vhost_user.h
> >
> > diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index
> > 5ab0765..1c809ab 100644
> > --- a/examples/vdpa/main.c
> > +++ b/examples/vdpa/main.c
> > @@ -20,6 +20,7 @@
> >  #include <cmdline_parse_string.h>
> >  #include <cmdline_parse_num.h>
> >  #include <cmdline.h>
> > +#include "vdpa_blk_compact.h"
> >
> >  #define MAX_PATH_LEN 128
> >  #define MAX_VDPA_SAMPLE_PORTS 1024
> > @@ -41,6 +42,7 @@ struct vdpa_port {
> >  static int devcnt;
> >  static int interactive;
> >  static int client_mode;
> > +static int isblk;
> >
> >  /* display usage */
> >  static void
> > @@ -49,7 +51,8 @@ struct vdpa_port {
> >  	printf("Usage: %s [EAL options] -- "
> >  				 "	--interactive|-i: run in interactive
> > mode.\n"
> >  				 "	--iface <path>: specify the path prefix
> of
> > the socket files, e.g. /tmp/vhost-user-.\n"
> > -				 "	--client: register a vhost-user socket
> as
> > client mode.\n",
> > +				 "	--client: register a vhost-user socket
> as
> > client mode.\n"
> > +				 "	--isblk: device is a block device, e.g.
> > virtio_blk device.\n",
> >  				 prgname);
> >  }
> >
> > @@ -61,6 +64,7 @@ struct vdpa_port {
> >  		{"iface", required_argument, NULL, 0},
> >  		{"interactive", no_argument, &interactive, 1},
> >  		{"client", no_argument, &client_mode, 1},
> > +		{"isblk", no_argument, &isblk, 1},
> 
> I think a new API for get_device_type will be better than asking user to
> specify the device type.
> 
Good suggestion. I will send out a version of patch set and try to do this.
> >  		{NULL, 0, 0, 0},
> >  	};
> >  	int opt, idx;
> > @@ -159,6 +163,52 @@ struct vdpa_port {  };
> >
> >  static int
> > +vdpa_blk_device_set_features_and_protocol(const char *path) {
> > +	uint64_t protocol_features = 0;
> > +	int ret;
> > +
> > +	ret = rte_vhost_driver_set_features(path,
> VHOST_BLK_FEATURES_BASE);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_set_features for %s failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	ret = rte_vhost_driver_disable_features(path,
> > +		VHOST_VDPA_BLK_DISABLED_FEATURES);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_disable_features for %s failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	ret = rte_vhost_driver_get_protocol_features(path,
> > &protocol_features);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_get_protocol_features for %s
> > failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
> > +	protocol_features |= (1ULL <<
> VHOST_USER_PROTOCOL_F_LOG_SHMFD);
> > +
> > +	ret = rte_vhost_driver_set_protocol_features(path,
> > protocol_features);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_set_protocol_features for %s
> > failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +out:
> > +	return ret;
> > +}
> > +
> > +static int
> >  start_vdpa(struct vdpa_port *vport)
> >  {
> >  	int ret;
> > @@ -192,6 +242,15 @@ struct vdpa_port {
> >  			"attach vdpa device failed: %s\n",
> >  			socket_path);
> >
> > +	if (isblk) {
> > +		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
> > +		ret =
> vdpa_blk_device_set_features_and_protocol(socket_path);
> > +		if (ret != 0)
> > +			rte_exit(EXIT_FAILURE,
> > +				"set vhost blk driver features and protocol
> > features failed: %s\n",
> > +				socket_path);
> > +	}
> > +
> >  	if (rte_vhost_driver_start(socket_path) < 0)
> >  		rte_exit(EXIT_FAILURE,
> >  			"start vhost driver failed: %s\n", diff --git
> > a/examples/vdpa/vdpa_blk_compact.h
> > b/examples/vdpa/vdpa_blk_compact.h
> 
> Please remove unused definition in this header. I didn't check all, but it
> seems at least VHOST_BLK_PROTOCOL_FEATURES is not used.
> 
Sure. I will remove all unused definition.
> > new file mode 100644
> > index 0000000..e7c0f22
> > --- /dev/null
> > +++ b/examples/vdpa/vdpa_blk_compact.h
> > @@ -0,0 +1,72 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Intel Corporation  */
> > +
> > +#ifndef _VDPA_BLK_COMPACT_H_
> > +#define _VDPA_BLK_COMPACT_H_
> > +
> > +/**
> > + * @file
> > + *
> > + * Device specific vhost lib
> > + */
> > +
> > +#include <stdbool.h>
> > +
> > +#include <rte_pci.h>
> > +#include <rte_vhost.h>
> > +
> > +/* Feature bits */
> > +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment
> size
> > */
> > +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of
> segments
> > */
> > +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> > +#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
> > +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available
> > */
> > +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is
> > available */
> > +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> > +#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
> > +#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported
> */
> > +
> > +/* Legacy feature bits */
> > +#ifndef VIRTIO_BLK_NO_LEGACY
> > +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> > +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru
> > */
> > +#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
> > +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in
> > config */
> > +
> > +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */ #define
> > +VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH #endif
> /* !VIRTIO_BLK_NO_LEGACY
> > +*/
> > +
> > +#ifndef VHOST_USER_F_PROTOCOL_FEATURES #define
> > +VHOST_USER_F_PROTOCOL_FEATURES 30 #endif
> > +
> > +#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
> > +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> > +	(1ULL << VIRTIO_F_VERSION_1) | \
> > +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> > +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> > +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
> > +
> > +#define VHOST_BLK_DISABLED_FEATURES ((1ULL <<
> > +VIRTIO_RING_F_EVENT_IDX) |
> > \
> > +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
> > +
> > +#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
> > +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL <<
> VIRTIO_BLK_F_SEG_MAX) | \
> > +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL <<
> VIRTIO_BLK_F_BLK_SIZE) |
> > \
> > +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL <<
> VIRTIO_BLK_F_BARRIER)  |
> > \
> > +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL <<
> VIRTIO_BLK_F_CONFIG_WCE)
> > | \
> > +	(1ULL << VIRTIO_BLK_F_MQ))
> > +
> > +/* Not supported features */
> > +#define VHOST_VDPA_BLK_DISABLED_FEATURES
> (VHOST_BLK_DISABLED_FEATURES | \
> > +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL <<
> VIRTIO_BLK_F_CONFIG_WCE)
> > | \
> > +	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
> > +
> > +/* Vhost-blk support protocol features */ #define
> > +VHOST_BLK_PROTOCOL_FEATURES \
> > +	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
> > +	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
> > +
> > +#endif /* _VDPA_BLK_COMPACT_H_ */
> > diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
> > new file mode 100644 index 0000000..137bbc2
> > --- /dev/null
> > +++ b/examples/vdpa/vhost_user.h
> 
> I don't understand, why introduce this header? It seems never used.
> 
Sure. I will remove this file.
> Thanks,
> Chenbo
> 
> > @@ -0,0 +1,169 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Intel Corporation
> > + */
> > +
> > +#ifndef _VHOST_NET_USER_H
> > +#define _VHOST_NET_USER_H
> > +
> > +#include <stdint.h>
> > +#include <linux/vhost.h>
> > +
> > +#include "rte_vhost.h"
> > +
> > +/* refer to hw/virtio/vhost-user.c */
> > +
> > +#define VHOST_MEMORY_MAX_NREGIONS 8
> > +
> > +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> > +#define VHOST_USER_MAX_CONFIG_SIZE		256
> > +#endif
> > +
> > +#define VHOST_USER_PROTOCOL_FEATURES	((1ULL <<
> > VHOST_USER_PROTOCOL_F_MQ) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD)
> |\
> > +			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
> > +			(1ULL <<
> VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
> > +			(1ULL <<
> VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
> > +			(1ULL <<
> VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
> > +
> > +typedef enum VhostUserRequest {
> > +	VHOST_USER_NONE = 0,
> > +	VHOST_USER_GET_FEATURES = 1,
> > +	VHOST_USER_SET_FEATURES = 2,
> > +	VHOST_USER_SET_OWNER = 3,
> > +	VHOST_USER_RESET_OWNER = 4,
> > +	VHOST_USER_SET_MEM_TABLE = 5,
> > +	VHOST_USER_SET_LOG_BASE = 6,
> > +	VHOST_USER_SET_LOG_FD = 7,
> > +	VHOST_USER_SET_VRING_NUM = 8,
> > +	VHOST_USER_SET_VRING_ADDR = 9,
> > +	VHOST_USER_SET_VRING_BASE = 10,
> > +	VHOST_USER_GET_VRING_BASE = 11,
> > +	VHOST_USER_SET_VRING_KICK = 12,
> > +	VHOST_USER_SET_VRING_CALL = 13,
> > +	VHOST_USER_SET_VRING_ERR = 14,
> > +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> > +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> > +	VHOST_USER_GET_QUEUE_NUM = 17,
> > +	VHOST_USER_SET_VRING_ENABLE = 18,
> > +	VHOST_USER_SEND_RARP = 19,
> > +	VHOST_USER_NET_SET_MTU = 20,
> > +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> > +	VHOST_USER_IOTLB_MSG = 22,
> > +	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> > +	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> > +	VHOST_USER_POSTCOPY_ADVISE = 28,
> > +	VHOST_USER_POSTCOPY_LISTEN = 29,
> > +	VHOST_USER_POSTCOPY_END = 30,
> > +	VHOST_USER_GET_INFLIGHT_FD = 31,
> > +	VHOST_USER_SET_INFLIGHT_FD = 32,
> > +	VHOST_USER_MAX = 33
> > +} VhostUserRequest;
> > +
> > +typedef enum VhostUserSlaveRequest {
> > +	VHOST_USER_SLAVE_NONE = 0,
> > +	VHOST_USER_SLAVE_IOTLB_MSG = 1,
> > +	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
> > +	VHOST_USER_SLAVE_MAX
> > +} VhostUserSlaveRequest;
> > +
> > +typedef struct VhostUserMemoryRegion {
> > +	uint64_t guest_phys_addr;
> > +	uint64_t memory_size;
> > +	uint64_t userspace_addr;
> > +	uint64_t mmap_offset;
> > +} VhostUserMemoryRegion;
> > +
> > +typedef struct VhostUserMemory {
> > +	uint32_t nregions;
> > +	uint32_t padding;
> > +	VhostUserMemoryRegion
> regions[VHOST_MEMORY_MAX_NREGIONS];
> > +} VhostUserMemory;
> > +
> > +typedef struct VhostUserLog {
> > +	uint64_t mmap_size;
> > +	uint64_t mmap_offset;
> > +} VhostUserLog;
> > +
> > +/* Comply with Cryptodev-Linux */
> > +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
> > +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
> > +
> > +/* Same structure as vhost-user backend session info */
> > +typedef struct VhostUserCryptoSessionParam {
> > +	int64_t session_id;
> > +	uint32_t op_code;
> > +	uint32_t cipher_algo;
> > +	uint32_t cipher_key_len;
> > +	uint32_t hash_algo;
> > +	uint32_t digest_len;
> > +	uint32_t auth_key_len;
> > +	uint32_t aad_len;
> > +	uint8_t op_type;
> > +	uint8_t dir;
> > +	uint8_t hash_mode;
> > +	uint8_t chaining_dir;
> > +	uint8_t *ciphe_key;
> > +	uint8_t *auth_key;
> > +	uint8_t
> cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
> > +	uint8_t
> auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
> > +} VhostUserCryptoSessionParam;
> > +
> > +typedef struct VhostUserVringArea {
> > +	uint64_t u64;
> > +	uint64_t size;
> > +	uint64_t offset;
> > +} VhostUserVringArea;
> > +
> > +typedef struct VhostUserInflight {
> > +	uint64_t mmap_size;
> > +	uint64_t mmap_offset;
> > +	uint16_t num_queues;
> > +	uint16_t queue_size;
> > +} VhostUserInflight;
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +	uint32_t offset;
> > +	uint32_t size;
> > +	uint32_t flags;
> > +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
> > +
> > +typedef struct VhostUserMsg {
> > +	union {
> > +		uint32_t master; /* a VhostUserRequest value */
> > +		uint32_t slave;  /* a VhostUserSlaveRequest value*/
> > +	} request;
> > +
> > +#define VHOST_USER_VERSION_MASK     0x3
> > +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> > +#define VHOST_USER_NEED_REPLY		(0x1 << 3)
> > +	uint32_t flags;
> > +	uint32_t size; /* the following payload size */
> > +	union {
> > +#define VHOST_USER_VRING_IDX_MASK   0xff
> > +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> > +		uint64_t u64;
> > +		struct vhost_vring_state state;
> > +		struct vhost_vring_addr addr;
> > +		VhostUserMemory memory;
> > +		VhostUserLog    log;
> > +		struct vhost_iotlb_msg iotlb;
> > +		VhostUserCryptoSessionParam crypto_session;
> > +		VhostUserVringArea area;
> > +		VhostUserInflight inflight;
> > +		struct vhost_user_config cfg;
> > +	} payload;
> > +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> > +	int fd_num;
> > +} __attribute((packed)) VhostUserMsg;
> > +
> > +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
> > +
> > +/* The version of the protocol we support */
> > +#define VHOST_USER_VERSION    0x1
> > +#endif
> > --
> > 1.8.3.1
> 


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-26  9:17           ` Xia, Chenbo
@ 2022-04-27  4:12             ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-04-27  4:12 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Chenbo, 

Thanks for your reply.
My reply is inline.

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Tuesday, April 26, 2022 5:17 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Tuesday, April 26, 2022 4:56 PM
> > To: Xia, Chenbo <chenbo.xia@intel.com>; dev@dpdk.org
> > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > Changpeng <changpeng.liu@intel.com>
> > Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> >
> > HI Chenbo,
> >
> > Thanks for your reply.
> > My reply is inline.
> >
> > > -----Original Message-----
> > > From: Xia, Chenbo <chenbo.xia@intel.com>
> > > Sent: Monday, April 25, 2022 8:42 PM
> > > To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> > > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > > Changpeng <changpeng.liu@intel.com>
> > > Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> > >
> > > Hi Andy,
> > >
> > > > -----Original Message-----
> > > > From: Pei, Andy <andy.pei@intel.com>
> > > > Sent: Thursday, April 21, 2022 4:34 PM
> > > > To: dev@dpdk.org
> > > > Cc: Xia, Chenbo <chenbo.xia@intel.com>;
> > > > maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > > > Changpeng <changpeng.liu@intel.com>
> > > > Subject: [PATCH v6 03/16] vhost: add vhost msg support
> > > >
> > > > Add support for VHOST_USER_GET_CONFIG and
> > > VHOST_USER_SET_CONFIG.
> > > > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is
> only
> > > > supported by virtio blk VDPA device.
> > > >
> > > > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > > > ---
> > > >  lib/vhost/vhost_user.c | 69
> > > > ++++++++++++++++++++++++++++++++++++++++++++++++++
> > > >  lib/vhost/vhost_user.h | 13 ++++++++++
> > > >  2 files changed, 82 insertions(+)
> > > >
> > > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > > > 1d39067..3780804 100644
> > > > --- a/lib/vhost/vhost_user.c
> > > > +++ b/lib/vhost/vhost_user.c
> > > > @@ -80,6 +80,8 @@
> > > >  [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> > > > [VHOST_USER_SET_SLAVE_REQ_FD]  =
> > > "VHOST_USER_SET_SLAVE_REQ_FD",
> > > >  [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > > > +[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > > > +[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> > > >  [VHOST_USER_CRYPTO_CREATE_SESS] =
> > > "VHOST_USER_CRYPTO_CREATE_SESS",
> > > >  [VHOST_USER_CRYPTO_CLOSE_SESS] =
> > > "VHOST_USER_CRYPTO_CLOSE_SESS",
> > > >  [VHOST_USER_POSTCOPY_ADVISE]  =
> > > "VHOST_USER_POSTCOPY_ADVISE", @@
> > > > -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net
> > > > *dev, }
> > > >
> > > >  static int
> > > > +vhost_user_get_config(struct virtio_net **pdev, struct
> > > > +vhu_msg_context *ctx, int main_fd __rte_unused) { struct
> > > > +virtio_net *dev = *pdev; struct rte_vdpa_device *vdpa_dev =
> > > > +dev->vdpa_dev; int ret = 0;
> > > > +
> > > > +if (vdpa_dev->ops->get_config) {
> > > > +ret = vdpa_dev->ops->get_config(dev->vid,
> > > > +   ctx->msg.payload.cfg.region,
> > > > +   ctx->msg.payload.cfg.size);
> > > > +if (ret != 0) {
> > > > +ctx->msg.size = 0;
> > > > +VHOST_LOG_CONFIG(ERR,
> > > > + "(%s) get_config() return error!\n",
> > > > + dev->ifname);
> > > > +}
> > > > +} else {
> > > > +VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> > > supportted!\n",
> > >
> > > Supported
> > >
> > I will send out a new version to fix this.
> > > > + dev->ifname);
> > > > +}
> > > > +
> > > > +return RTE_VHOST_MSG_RESULT_REPLY; }
> > > > +
> > > > +static int
> > > > +vhost_user_set_config(struct virtio_net **pdev, struct
> > > > +vhu_msg_context *ctx, int main_fd __rte_unused) { struct
> > > > +virtio_net *dev = *pdev; struct rte_vdpa_device *vdpa_dev =
> > > > +dev->vdpa_dev; int ret = 0;
> > > > +
> > > > +if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> > >
> > > I think you should do sanity check on payload.cfg.size and make sure
> > it's
> > > smaller than VHOST_USER_MAX_CONFIG_SIZE
> > >
> > > and same check for offset
> > >
> > I think payload.cfg.size can be smaller than or equal to
> > VHOST_USER_MAX_CONFIG_SIZE.
> > payload.cfg.ofset can be smaller than or equal to
> > VHOST_USER_MAX_CONFIG_SIZE as well
> 
> After double check: offset is the config space offset, so this should be
> checked in vdpa driver. Size check on vhost lib layer should be just <=
> MAX_you_defined
> 
OK.
> Thanks,
> Chenbo
> 
> >
> > > > +VHOST_LOG_CONFIG(ERR,
> > > > +"(%s) invalid set config msg size: %"PRId32" != %d\n",
> > > > +dev->ifname, ctx->msg.size,
> > >
> > > Based on you will change the log too, payload.cfg.size is uint32_t,
> > > so
> > PRId32 ->
> > > PRIu32
> > >
> > > > +(int)sizeof(struct vhost_user_config));
> > >
> > > So this can be %u
> > >
> > Sure.
> > > > +goto OUT;
> > > > +}
> > > > +
> > > > +if (vdpa_dev->ops->set_config) {
> > > > +ret = vdpa_dev->ops->set_config(dev->vid,
> > > > +ctx->msg.payload.cfg.region,
> > > > +ctx->msg.payload.cfg.offset,
> > > > +ctx->msg.payload.cfg.size,
> > > > +ctx->msg.payload.cfg.flags);
> > > > +if (ret)
> > > > +VHOST_LOG_CONFIG(ERR,
> > > > + "(%s) set_config() return error!\n",
> > > > + dev->ifname);
> > > > +} else {
> > > > +VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> > > supportted!\n",
> > >
> > > Supported
> > >
> > I will send out a new version to fix this.
> > > > + dev->ifname);
> > > > +}
> > > > +
> > > > +return RTE_VHOST_MSG_RESULT_OK;
> > > > +
> > > > +OUT:
> > >
> > > Lower case looks better
> > >
> > OK. I will send out a new version to fix this.
> > > > +return RTE_VHOST_MSG_RESULT_ERR;
> > > > +}
> > >
> > > Almost all handlers need check on expected fd num (this case is 0),
> > > so
> > the
> > > above new handlers should also do that. Please refer to
> > > validate_msg_fds
> > in
> > > other handlers.
> > >
> > > BTW, you can wait for review for other patches and send new versions
> > later.
> > >
> > I will send out new patch after vhost: validate fds attached to
> > messages from David Marchand is merged.
> > > Thanks,
> > > Chenbo
> > >
> > > > +
> > > > +static int
> > > >  vhost_user_iotlb_msg(struct virtio_net **pdev,  struct
> > > > vhu_msg_context *ctx,  int main_fd __rte_unused) @@ -2782,6
> > > > +2849,8 @@ typedef int (*vhost_message_handler_t)(struct
> > > > virtio_net **pdev,  [VHOST_USER_NET_SET_MTU] =
> > > > vhost_user_net_set_mtu,  [VHOST_USER_SET_SLAVE_REQ_FD] =
> > > > vhost_user_set_req_fd,  [VHOST_USER_IOTLB_MSG] =
> > > > vhost_user_iotlb_msg,
> > > > +[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > > > +[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> > > >  [VHOST_USER_POSTCOPY_ADVISE] =
> > > vhost_user_set_postcopy_advise,
> > > >  [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> > > > [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> > > git
> > > > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > > > c946cc2..97cfb2f 100644
> > > > --- a/lib/vhost/vhost_user.h
> > > > +++ b/lib/vhost/vhost_user.h
> > > > @@ -50,6 +50,8 @@
> > > >  VHOST_USER_NET_SET_MTU = 20,
> > > >  VHOST_USER_SET_SLAVE_REQ_FD = 21,  VHOST_USER_IOTLB_MSG =
> 22,
> > > > +VHOST_USER_GET_CONFIG = 24,
> > > > +VHOST_USER_SET_CONFIG = 25,
> > > >  VHOST_USER_CRYPTO_CREATE_SESS = 26,
> VHOST_USER_CRYPTO_CLOSE_SESS
> > > > = 27,  VHOST_USER_POSTCOPY_ADVISE = 28, @@ -125,6 +127,16 @@
> > > > uint16_t queue_size;  } VhostUserInflight;
> > > >
> > > > +#define VHOST_USER_MAX_CONFIG_SIZE256
> > > > +
> > > > +/** Get/set config msg payload */ struct vhost_user_config {
> > > > +uint32_t offset; uint32_t size; uint32_t flags; uint8_t
> > > > +region[VHOST_USER_MAX_CONFIG_SIZE];
> > > > +};
> > > > +
> > > >  typedef struct VhostUserMsg {
> > > >  union {
> > > >  uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> > > +160,7 @@
> > > >  VhostUserCryptoSessionParam crypto_session;  VhostUserVringArea
> > > > area;  VhostUserInflight inflight;
> > > > +struct vhost_user_config cfg;
> > > >  } payload;
> > > >  /* Nothing should be added after the payload */  } __rte_packed
> > > > VhostUserMsg;
> > > > --
> > > > 1.8.3.1
> > >
> >
> 


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                     ` (4 preceding siblings ...)
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-04-27  8:29   ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 01/18] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (17 more replies)
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  6 siblings, 18 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

v7:
 Check on expected fd num in new vhost msg handler.
 Sanity check on vhost msg size.
 Fix typo.
 Add commit log to help understand code.
 Remove duplicated code.
 Add new API to get vDPA device type.
v6:
 Fix some commit log.
 Add vhost socket in log output to make it more user-friendly.
 When driver ops fail, just output some log, do not break message handler.
 Check vhost msg size in msg handler.
v5:
 Fix some coding style issues.
v4:
 Add args "isblk" to vdpa example to specify a block device, fix some
 Issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.

Andy Pei (18):
  vdpa/ifc: add support for virtio blk device
  vhost: add vDPA ops for blk device
  vhost: add vhost msg support
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vDPA interrupt for blk device
  vdpa/ifc: add block device SW live-migration
  vhost: add API to get vDPA device type
  vdpa/ifc: add get device type ops to ifc driver
  examples/vdpa: add vDPA blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: add set vring state for blk device
  vdpa/ifc: add some log at vDPA launch before qemu connect
  vdpa/ifc: read virtio max queues from hardware
  vdpa/ifc: add interrupt and handle for virtio blk
  vdpa/ifc: add is blk flag to ifcvf HW struct
  vdpa/ifc/base: access correct register for blk device
  vdpa/ifc: blk device pause without no inflight IO
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c    |  36 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  20 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 392 +++++++++++++++++++++++++++++++++++++--
 examples/vdpa/main.c             |  57 ++++++
 examples/vdpa/vdpa_blk_compact.h |  65 +++++++
 lib/vhost/rte_vhost.h            |  17 ++
 lib/vhost/socket.c               |  39 ++++
 lib/vhost/vdpa_driver.h          |  11 +-
 lib/vhost/version.map            |   2 +
 lib/vhost/vhost_user.c           |  97 ++++++++++
 lib/vhost/vhost_user.h           |  13 ++
 usertools/dpdk-devbind.py        |   5 +-
 12 files changed, 730 insertions(+), 24 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 01/18] vdpa/ifc: add support for virtio blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 02/18] vhost: add vDPA ops for " Andy Pei
                       ` (16 subsequent siblings)
  17 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..e3210a8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1174,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1227,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1277,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1306,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1313,6 +1375,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 02/18] vhost: add vDPA ops for blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-04-27  8:29     ` [PATCH v7 01/18] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 03/18] vhost: add vhost msg support Andy Pei
                       ` (15 subsequent siblings)
  17 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vDPA ops.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 03/18] vhost: add vhost msg support
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-04-27  8:29     ` [PATCH v7 01/18] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-04-27  8:29     ` [PATCH v7 02/18] vhost: add vDPA ops for " Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-11 14:24       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 04/18] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (14 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 13 ++++++++
 2 files changed, 96 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 1d39067..e925428 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -80,6 +80,8 @@
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
@@ -2542,6 +2544,85 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) get_config() return error!\n",
+					 dev->ifname);
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) invalid set config msg size: %"PRIu32" != %d\n",
+			dev->ifname, ctx->msg.size,
+			(int)sizeof(struct vhost_user_config));
+		goto out;
+	}
+
+	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) vhost_user_config size: %"PRIu32", should not be larger than %d\n",
+			dev->ifname, ctx->msg.payload.cfg.size,
+			VHOST_USER_MAX_CONFIG_SIZE);
+		goto out;
+	}
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+		if (ret)
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) set_config() return error!\n",
+					 dev->ifname);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_OK;
+
+out:
+	return RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2782,6 +2863,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
 	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
 	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
+	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
 	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
 	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
 	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index c946cc2..97cfb2f 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -125,6 +127,16 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -148,6 +160,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 04/18] vdpa/ifc: add blk ops for ifc device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 03/18] vhost: add vhost msg support Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
                       ` (13 subsequent siblings)
  17 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e3210a8..8ee041f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 04/18] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-11 14:35       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (12 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

For the block device type, we use one queue to transfer
both read and write requests, so we have to relay commands
on all queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8ee041f..07fc3ca 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -370,6 +370,7 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
@@ -379,7 +380,13 @@ struct rte_vdpa_dev_info {
 	for (i = 0; i < nr_vring; i++) {
 		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
 		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
+		if (m_rx == true &&
+			((i & 1) == 0 || internal->device_type == IFCVF_BLK)) {
+			/* For the net we only need to relay rx queue,
+			 * which will change the mem of VM.
+			 * For the blk we need to relay all the read cmd
+			 * of each queue
+			 */
 			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
 			if (fd < 0) {
 				DRV_LOG(ERR, "can't setup eventfd: %s",
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 12:55       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 07/18] vhost: add API to get vDPA device type Andy Pei
                       ` (11 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add SW live-migration support to block device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 07fc3ca..8a260b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -312,6 +312,7 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
 	uint32_t i;
 	int vid;
 	uint64_t features = 0;
@@ -319,6 +320,22 @@ struct rte_vdpa_dev_info {
 	uint64_t len;
 
 	vid = internal->vid;
+
+	/* to make sure no packet is lost for blk device
+	 * do not stop until last_avail_idx == last_used_idx
+	 */
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
 	ifcvf_stop_hw(hw);
 
 	for (i = 0; i < hw->nr_vring; i++)
@@ -642,8 +659,10 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NET: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((internal->device_type == IFCVF_NET) && (i & 1)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
@@ -693,8 +712,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -756,7 +779,9 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
+	for (qid = 0; qid < q_num; qid += 1) {
+		if ((internal->device_type == IFCVF_NET) && (qid & 1))
+			continue;
 		ev.events = EPOLLIN | EPOLLPRI;
 		/* leave a flag to mark it's for interrupt */
 		ev.data.u64 = 1 | qid << 1 |
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 07/18] vhost: add API to get vDPA device type
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 13:14       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver Andy Pei
                       ` (10 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Vhost backend of different devices have different features.
Add a API to get vDPA device type, net device or blk device
currently, so users can set different features for different
kinds of devices.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/rte_vhost.h   | 17 +++++++++++++++++
 lib/vhost/socket.c      | 39 +++++++++++++++++++++++++++++++++++++++
 lib/vhost/vdpa_driver.h |  3 +++
 lib/vhost/version.map   |  2 ++
 4 files changed, 61 insertions(+)

diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index c733f85..c977a24 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -117,6 +117,9 @@
 
 #define RTE_MAX_VHOST_DEVICE	1024
 
+#define VDPA_DEVICE_TYPE_NET 0
+#define VDPA_DEVICE_TYPE_BLK 1
+
 struct rte_vdpa_device;
 
 /**
@@ -486,6 +489,20 @@ struct rte_vdpa_device *
 rte_vhost_driver_get_vdpa_device(const char *path);
 
 /**
+ * Get the device type of the vdpa device.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param type
+ *  the device type of the vdpa device
+ * @return
+ *  0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
+
+/**
  * Set the feature bits the vhost-user driver supports.
  *
  * @param path
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index b304339..7da90e8 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -619,6 +619,45 @@ struct rte_vdpa_device *
 }
 
 int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
+{
+	struct vhost_user_socket *vsocket;
+	struct rte_vdpa_device *vdpa_dev;
+	uint32_t vdpa_type = 0;
+	int ret = 0;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (!vsocket) {
+		VHOST_LOG_CONFIG(ERR,
+				 "(%s) socket file is not registered yet.\n",
+				 path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	vdpa_dev = vsocket->vdpa_dev;
+	if (!vdpa_dev) {
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type) < 0) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) failed to get vdpa dev type for socket file.\n",
+			path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	*type = vdpa_type;
+
+unlock_exit:
+	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+int
 rte_vhost_driver_disable_features(const char *path, uint64_t features)
 {
 	struct vhost_user_socket *vsocket;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index e59a834..9cbd7cd 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
 	/** Set the device configuration space */
 	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
 		      uint32_t size, uint32_t flags);
+
+	/** get device type: net device, blk device... */
+	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
 };
 
 /**
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 0a66c58..fe4e8de 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -87,6 +87,8 @@ EXPERIMENTAL {
 
 	# added in 22.03
 	rte_vhost_async_dma_configure;
+
+	rte_vhost_driver_get_vdpa_dev_type;
 };
 
 INTERNAL {
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 07/18] vhost: add API to get vDPA device type Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 13:21       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example Andy Pei
                       ` (9 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add get device type ops to ifc driver.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8a260b7..99a6ab0 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1300,6 +1300,15 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static int
+ifcvf_blk_get_device_type(struct rte_vdpa_device *vdev,
+	uint32_t *type)
+{
+	RTE_SET_USED(vdev);
+	*type = VDPA_DEVICE_TYPE_BLK;
+	return 0;
+}
+
 static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
 	.get_queue_num = ifcvf_get_queue_num,
 	.get_features = ifcvf_get_vdpa_features,
@@ -1313,6 +1322,7 @@ struct rte_vdpa_dev_info {
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
 	.get_notify_area = ifcvf_get_notify_area,
 	.get_config = ifcvf_blk_get_config,
+	.get_dev_type = ifcvf_blk_get_device_type,
 };
 
 struct rte_vdpa_dev_info dev_info[] = {
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 13:34       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 10/18] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to vDPA example.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             | 57 +++++++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 65 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h

diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..2544141 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -159,8 +160,54 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
+	uint32_t device_type = 0;
 	int ret;
 	char *socket_path = vport->ifname;
 
@@ -192,6 +239,16 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type);
+	if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) {
+		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..136c3f6
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1))
+
+#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX))
+
+#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
+	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 10/18] usertools: add support for virtio blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 11/18] vdpa/ifc: add set vring state for " Andy Pei
                       ` (7 subsequent siblings)
  17 siblings, 0 replies; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add virtio blk device support to devbind.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..7231be4 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -72,6 +72,9 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': None, 'SDevice': None}
+
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -82,7 +85,7 @@
 compress_devices = [cavium_zip]
 regex_devices = [cn9k_ree]
 misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev,
-                intel_ntb_skx, intel_ntb_icx]
+                intel_ntb_skx, intel_ntb_icx, virtio_blk]
 
 # global dict ethernet devices present. Dictionary indexed by PCI address.
 # Each device within this is itself a dictionary of device properties
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 11/18] vdpa/ifc: add set vring state for blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 10/18] usertools: add support for virtio blk device Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 13:44       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 99a6ab0..ca49bc3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1290,6 +1290,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1316,7 +1326,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 11/18] vdpa/ifc: add set vring state for " Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-12 13:53       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add some log of virtio blk device config space information
at VDPA launch before qemu connects.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index ca49bc3..4060a44 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1363,6 +1363,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1429,6 +1432,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-12 13:55       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
                       ` (4 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4060a44..5a8cf1c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1457,6 +1457,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  2:52       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
                       ` (3 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 112 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 5a8cf1c..0e94e1f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -558,6 +560,107 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -584,10 +687,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -880,6 +989,9 @@ struct rte_vdpa_dev_info {
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
 	vdpa_ifcvf_stop(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  2:55       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device Andy Pei
                       ` (2 subsequent siblings)
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

Add is_blk flag to ifcvf_hw, and init is_blk during probe.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..8591ef1 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 0e94e1f..4923bc1 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1536,11 +1536,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (14 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  2:57       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO Andy Pei
  2022-04-27  8:30     ` [PATCH v7 18/18] vhost: make sure each queue callfd is configured Andy Pei
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..4d5881a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +262,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8591ef1..ff11b12 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (15 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  2:59       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 18/18] vhost: make sure each queue callfd is configured Andy Pei
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

When virtio blk device is pause, make sure hardware last_avail_idx
and last_used_idx are the same.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4923bc1..def6adf 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -314,12 +314,12 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	uint32_t i;
 	int vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
@@ -328,13 +328,22 @@ struct rte_vdpa_dev_info {
 	 */
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* [PATCH v7 18/18] vhost: make sure each queue callfd is configured
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (16 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  3:10       ` Xia, Chenbo
  17 siblings, 1 reply; 191+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu

During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index e925428..82122b6 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3230,12 +3230,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n",
 					dev->ifname);
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 03/18] vhost: add vhost msg support
  2022-04-27  8:29     ` [PATCH v7 03/18] vhost: add vhost msg support Andy Pei
@ 2022-05-11 14:24       ` Xia, Chenbo
  2022-05-12  3:50         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-05-11 14:24 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 03/18] vhost: add vhost msg support

Title better be: vhost: add vhost msg support for get/set config

> 
> Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> supported by virtio blk VDPA device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 83
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vhost_user.h | 13 ++++++++
>  2 files changed, 96 insertions(+)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 1d39067..e925428 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -80,6 +80,8 @@
>  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
>  	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
>  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
>  	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>  	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
>  	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
> @@ -2542,6 +2544,85 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
> 
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (validate_msg_fds(dev, ctx, 0) != 0)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	if (vdpa_dev->ops->get_config) {
> +		ret = vdpa_dev->ops->get_config(dev->vid,
> +					   ctx->msg.payload.cfg.region,
> +					   ctx->msg.payload.cfg.size);
> +		if (ret != 0) {
> +			ctx->msg.size = 0;
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) get_config() return error!\n",
> +					 dev->ifname);
> +		}
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (validate_msg_fds(dev, ctx, 0) != 0)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) invalid set config msg size: %"PRIu32" != %d\n",
> +			dev->ifname, ctx->msg.size,
> +			(int)sizeof(struct vhost_user_config));
> +		goto out;
> +	}

Sorry, I was wrong in v6, after double check, the size can be checked in read_vhost_message
(although not that accurate check). So we can remove above. (btw, even we check, the logic should
Be size <= sizeof(config))

Rest of the patch seems good, thanks.

Chenbo

> +
> +	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) vhost_user_config size: %"PRIu32", should not be
> larger than %d\n",
> +			dev->ifname, ctx->msg.payload.cfg.size,
> +			VHOST_USER_MAX_CONFIG_SIZE);
> +		goto out;
> +	}
> +
> +	if (vdpa_dev->ops->set_config) {
> +		ret = vdpa_dev->ops->set_config(dev->vid,
> +			ctx->msg.payload.cfg.region,
> +			ctx->msg.payload.cfg.offset,
> +			ctx->msg.payload.cfg.size,
> +			ctx->msg.payload.cfg.flags);
> +		if (ret)
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) set_config() return error!\n",
> +					 dev->ifname);
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_OK;
> +
> +out:
> +	return RTE_VHOST_MSG_RESULT_ERR;
> +}
> +
> +static int
>  vhost_user_iotlb_msg(struct virtio_net **pdev,
>  			struct vhu_msg_context *ctx,
>  			int main_fd __rte_unused)
> @@ -2782,6 +2863,8 @@ typedef int (*vhost_message_handler_t)(struct
> virtio_net **pdev,
>  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
>  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
>  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
>  	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
>  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
>  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
> diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
> index c946cc2..97cfb2f 100644
> --- a/lib/vhost/vhost_user.h
> +++ b/lib/vhost/vhost_user.h
> @@ -50,6 +50,8 @@
>  	VHOST_USER_NET_SET_MTU = 20,
>  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
>  	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
>  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
>  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
>  	VHOST_USER_POSTCOPY_ADVISE = 28,
> @@ -125,6 +127,16 @@
>  	uint16_t queue_size;
>  } VhostUserInflight;
> 
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
>  typedef struct VhostUserMsg {
>  	union {
>  		uint32_t master; /* a VhostUserRequest value */
> @@ -148,6 +160,7 @@
>  		VhostUserCryptoSessionParam crypto_session;
>  		VhostUserVringArea area;
>  		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
>  	} payload;
>  	/* Nothing should be added after the payload */
>  } __rte_packed VhostUserMsg;
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-27  8:29     ` [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
@ 2022-05-11 14:35       ` Xia, Chenbo
  2022-05-12  3:49         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-05-11 14:35 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
> 
> For the block device type, we use one queue to transfer
> both read and write requests, so we have to relay commands

relay interrupt?

I suggest using this commit log:

For the net device type, only interrupt of rxq needed to be relayed.
But for block, since all the queues are used for both read and write
requests. Interrupt of all queues needed to be relayed.

With this fixed:

Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>

> on all queues.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8ee041f..07fc3ca 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -370,6 +370,7 @@ struct rte_vdpa_dev_info {
>  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>  	irq_set->start = 0;
>  	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change
> notification */
>  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>  		rte_intr_fd_get(internal->pdev->intr_handle);
> 
> @@ -379,7 +380,13 @@ struct rte_vdpa_dev_info {
>  	for (i = 0; i < nr_vring; i++) {
>  		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
>  		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> +		if (m_rx == true &&
> +			((i & 1) == 0 || internal->device_type == IFCVF_BLK)) {
> +			/* For the net we only need to relay rx queue,
> +			 * which will change the mem of VM.
> +			 * For the blk we need to relay all the read cmd
> +			 * of each queue
> +			 */
>  			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>  			if (fd < 0) {
>  				DRV_LOG(ERR, "can't setup eventfd: %s",
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
  2022-05-11 14:35       ` Xia, Chenbo
@ 2022-05-12  3:49         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-05-12  3:49 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Chenbo,

Thanks for your reply.
Sure, I will use your commit log suggestion, and I will also change subject to "vdpa/ifc: add vDPA interrupt relay for blk device"

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Wednesday, May 11, 2022 10:36 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
> >
> > For the block device type, we use one queue to transfer both read and
> > write requests, so we have to relay commands
> 
> relay interrupt?
> 
> I suggest using this commit log:
> 
> For the net device type, only interrupt of rxq needed to be relayed.
> But for block, since all the queues are used for both read and write requests.
> Interrupt of all queues needed to be relayed.
> 
> With this fixed:
> 
> Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> 
> > on all queues.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
> >  1 file changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8ee041f..07fc3ca 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -370,6 +370,7 @@ struct rte_vdpa_dev_info {
> >  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> >  	irq_set->start = 0;
> >  	fd_ptr = (int *)&irq_set->data;
> > +	/* The first interrupt is for the configure space change
> > notification */
> >  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
> >  		rte_intr_fd_get(internal->pdev->intr_handle);
> >
> > @@ -379,7 +380,13 @@ struct rte_vdpa_dev_info {
> >  	for (i = 0; i < nr_vring; i++) {
> >  		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> >  		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> > -		if ((i & 1) == 0 && m_rx == true) {
> > +		if (m_rx == true &&
> > +			((i & 1) == 0 || internal->device_type == IFCVF_BLK)) {
> > +			/* For the net we only need to relay rx queue,
> > +			 * which will change the mem of VM.
> > +			 * For the blk we need to relay all the read cmd
> > +			 * of each queue
> > +			 */
> >  			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> >  			if (fd < 0) {
> >  				DRV_LOG(ERR, "can't setup eventfd: %s",
> > --
> > 1.8.3.1
> 


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 03/18] vhost: add vhost msg support
  2022-05-11 14:24       ` Xia, Chenbo
@ 2022-05-12  3:50         ` Pei, Andy
  0 siblings, 0 replies; 191+ messages in thread
From: Pei, Andy @ 2022-05-12  3:50 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

Hi Chenbo,

Thanks for your reply.

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Wednesday, May 11, 2022 10:24 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 03/18] vhost: add vhost msg support
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 03/18] vhost: add vhost msg support
> 
> Title better be: vhost: add vhost msg support for get/set config
> 
Sure. 
> >
> > Add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG.
> > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > supported by virtio blk VDPA device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  lib/vhost/vhost_user.c | 83
> > ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/vhost/vhost_user.h | 13 ++++++++
> >  2 files changed, 96 insertions(+)
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 1d39067..e925428 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -80,6 +80,8 @@
> >  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> >  	[VHOST_USER_SET_SLAVE_REQ_FD]  =
> "VHOST_USER_SET_SLAVE_REQ_FD",
> >  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> >  	[VHOST_USER_CRYPTO_CREATE_SESS] =
> "VHOST_USER_CRYPTO_CREATE_SESS",
> >  	[VHOST_USER_CRYPTO_CLOSE_SESS] =
> "VHOST_USER_CRYPTO_CLOSE_SESS",
> >  	[VHOST_USER_POSTCOPY_ADVISE]  =
> "VHOST_USER_POSTCOPY_ADVISE", @@
> > -2542,6 +2544,85 @@ static int is_vring_iotlb(struct virtio_net *dev,
> > }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (validate_msg_fds(dev, ctx, 0) != 0)
> > +		return RTE_VHOST_MSG_RESULT_ERR;
> > +
> > +	if (vdpa_dev->ops->get_config) {
> > +		ret = vdpa_dev->ops->get_config(dev->vid,
> > +					   ctx->msg.payload.cfg.region,
> > +					   ctx->msg.payload.cfg.size);
> > +		if (ret != 0) {
> > +			ctx->msg.size = 0;
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) get_config() return error!\n",
> > +					 dev->ifname);
> > +		}
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> supported!\n",
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_REPLY;
> > +}
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (validate_msg_fds(dev, ctx, 0) != 0)
> > +		return RTE_VHOST_MSG_RESULT_ERR;
> > +
> > +	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +			"(%s) invalid set config msg size: %"PRIu32" != %d\n",
> > +			dev->ifname, ctx->msg.size,
> > +			(int)sizeof(struct vhost_user_config));
> > +		goto out;
> > +	}
> 
> Sorry, I was wrong in v6, after double check, the size can be checked in
> read_vhost_message (although not that accurate check). So we can remove
> above. (btw, even we check, the logic should Be size <= sizeof(config))
> 
> Rest of the patch seems good, thanks.
> 
> Chenbo
> 
OK. I will remove ctx->msg.size check here.
> > +
> > +	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +			"(%s) vhost_user_config size: %"PRIu32", should not
> be
> > larger than %d\n",
> > +			dev->ifname, ctx->msg.payload.cfg.size,
> > +			VHOST_USER_MAX_CONFIG_SIZE);
> > +		goto out;
> > +	}
> > +
> > +	if (vdpa_dev->ops->set_config) {
> > +		ret = vdpa_dev->ops->set_config(dev->vid,
> > +			ctx->msg.payload.cfg.region,
> > +			ctx->msg.payload.cfg.offset,
> > +			ctx->msg.payload.cfg.size,
> > +			ctx->msg.payload.cfg.flags);
> > +		if (ret)
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) set_config() return error!\n",
> > +					 dev->ifname);
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> supported!\n",
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_OK;
> > +
> > +out:
> > +	return RTE_VHOST_MSG_RESULT_ERR;
> > +}
> > +
> > +static int
> >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> >  			struct vhu_msg_context *ctx,
> >  			int main_fd __rte_unused)
> > @@ -2782,6 +2863,8 @@ typedef int (*vhost_message_handler_t)(struct
> > virtio_net **pdev,
> >  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
> >  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
> >  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> > +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> >  	[VHOST_USER_POSTCOPY_ADVISE] =
> vhost_user_set_postcopy_advise,
> >  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> >  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> git
> > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > c946cc2..97cfb2f 100644
> > --- a/lib/vhost/vhost_user.h
> > +++ b/lib/vhost/vhost_user.h
> > @@ -50,6 +50,8 @@
> >  	VHOST_USER_NET_SET_MTU = 20,
> >  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> >  	VHOST_USER_IOTLB_MSG = 22,
> > +	VHOST_USER_GET_CONFIG = 24,
> > +	VHOST_USER_SET_CONFIG = 25,
> >  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> >  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> >  	VHOST_USER_POSTCOPY_ADVISE = 28,
> > @@ -125,6 +127,16 @@
> >  	uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +#define VHOST_USER_MAX_CONFIG_SIZE		256
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +	uint32_t offset;
> > +	uint32_t size;
> > +	uint32_t flags;
> > +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
> > +
> >  typedef struct VhostUserMsg {
> >  	union {
> >  		uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> +160,7 @@
> >  		VhostUserCryptoSessionParam crypto_session;
> >  		VhostUserVringArea area;
> >  		VhostUserInflight inflight;
> > +		struct vhost_user_config cfg;
> >  	} payload;
> >  	/* Nothing should be added after the payload */  } __rte_packed
> > VhostUserMsg;
> > --
> > 1.8.3.1
> 


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
  2022-04-27  8:29     ` [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-05-12 12:55       ` Xia, Chenbo
  2022-05-13  3:32         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-05-12 12:55 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
> 
> Add SW live-migration support to block device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 33 +++++++++++++++++++++++++++++----
>  1 file changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 07fc3ca..8a260b7 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -312,6 +312,7 @@ struct rte_vdpa_dev_info {
>  vdpa_ifcvf_stop(struct ifcvf_internal *internal)
>  {
>  	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
>  	uint32_t i;
>  	int vid;
>  	uint64_t features = 0;
> @@ -319,6 +320,22 @@ struct rte_vdpa_dev_info {
>  	uint64_t len;
> 
>  	vid = internal->vid;
> +
> +	/* to make sure no packet is lost for blk device
> +	 * do not stop until last_avail_idx == last_used_idx
> +	 */
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +

This seems not match with the above comment about avoiding in-flight packets.
But the change in patch 17 seems good. Why not just using the implementation
in patch 17?

Thanks,
Chenbo

>  	ifcvf_stop_hw(hw);
> 
>  	for (i = 0; i < hw->nr_vring; i++)
> @@ -642,8 +659,10 @@ struct rte_vdpa_dev_info {
>  		}
>  		hw->vring[i].avail = gpa;
> 
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> +		/* NET: Direct I/O for Tx queue, relay for Rx queue
> +		 * BLK: relay every queue
> +		 */
> +		if ((internal->device_type == IFCVF_NET) && (i & 1)) {
>  			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
>  			if (gpa == 0) {
>  				DRV_LOG(ERR, "Fail to get GPA for used ring.");
> @@ -693,8 +712,12 @@ struct rte_vdpa_dev_info {
> 
>  	for (i = 0; i < hw->nr_vring; i++) {
>  		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (internal->device_type == IFCVF_NET) {
> +			if ((i & 1) == 0)
> +				update_used_ring(internal, i);
> +		} else if (internal->device_type == IFCVF_BLK) {
>  			update_used_ring(internal, i);
> +		}
> 
>  		rte_vhost_get_vhost_vring(vid, i, &vq);
>  		len = IFCVF_USED_RING_LEN(vq.size);
> @@ -756,7 +779,9 @@ struct rte_vdpa_dev_info {
>  		}
>  	}
> 
> -	for (qid = 0; qid < q_num; qid += 2) {
> +	for (qid = 0; qid < q_num; qid += 1) {
> +		if ((internal->device_type == IFCVF_NET) && (qid & 1))
> +			continue;
>  		ev.events = EPOLLIN | EPOLLPRI;
>  		/* leave a flag to mark it's for interrupt */
>  		ev.data.u64 = 1 | qid << 1 |
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 07/18] vhost: add API to get vDPA device type
  2022-04-27  8:29     ` [PATCH v7 07/18] vhost: add API to get vDPA device type Andy Pei
@ 2022-05-12 13:14       ` Xia, Chenbo
  2022-05-13  4:15         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:14 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 07/18] vhost: add API to get vDPA device type
> 
> Vhost backend of different devices have different features.
> Add a API to get vDPA device type, net device or blk device
> currently, so users can set different features for different
> kinds of devices.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/rte_vhost.h   | 17 +++++++++++++++++
>  lib/vhost/socket.c      | 39 +++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vdpa_driver.h |  3 +++
>  lib/vhost/version.map   |  2 ++
>  4 files changed, 61 insertions(+)
> 
> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
> index c733f85..c977a24 100644
> --- a/lib/vhost/rte_vhost.h
> +++ b/lib/vhost/rte_vhost.h
> @@ -117,6 +117,9 @@
> 
>  #define RTE_MAX_VHOST_DEVICE	1024
> 
> +#define VDPA_DEVICE_TYPE_NET 0
> +#define VDPA_DEVICE_TYPE_BLK 1
> +
>  struct rte_vdpa_device;
> 
>  /**
> @@ -486,6 +489,20 @@ struct rte_vdpa_device *
>  rte_vhost_driver_get_vdpa_device(const char *path);
> 
>  /**
> + * Get the device type of the vdpa device.
> + *
> + * @param path
> + *  The vhost-user socket file path
> + * @param type
> + *  the device type of the vdpa device
> + * @return
> + *  0 on success, -1 on failure
> + */
> +__rte_experimental
> +int
> +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
> +
> +/**
>   * Set the feature bits the vhost-user driver supports.
>   *
>   * @param path
> diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
> index b304339..7da90e8 100644
> --- a/lib/vhost/socket.c
> +++ b/lib/vhost/socket.c
> @@ -619,6 +619,45 @@ struct rte_vdpa_device *
>  }
> 
>  int
> +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
> +{
> +	struct vhost_user_socket *vsocket;
> +	struct rte_vdpa_device *vdpa_dev;
> +	uint32_t vdpa_type = 0;
> +	int ret = 0;
> +
> +	pthread_mutex_lock(&vhost_user.mutex);
> +	vsocket = find_vhost_user_socket(path);
> +	if (!vsocket) {
> +		VHOST_LOG_CONFIG(ERR,
> +				 "(%s) socket file is not registered yet.\n",
> +				 path);
> +		ret = -1;
> +		goto unlock_exit;
> +	}
> +
> +	vdpa_dev = vsocket->vdpa_dev;
> +	if (!vdpa_dev) {
> +		ret = -1;
> +		goto unlock_exit;
> +	}
> +
> +	if (vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type) < 0) {
> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) failed to get vdpa dev type for socket file.\n",
> +			path);
> +		ret = -1;
> +		goto unlock_exit;
> +	}

If vendor's vdpa driver does not implement this callback, should return type NET.
Another way to do may be make every vdpa driver implement the callback, but since
other vendors only have one type. I prefer the first way.

> +
> +	*type = vdpa_type;
> +
> +unlock_exit:
> +	pthread_mutex_unlock(&vhost_user.mutex);
> +	return ret;
> +}
> +
> +int
>  rte_vhost_driver_disable_features(const char *path, uint64_t features)
>  {
>  	struct vhost_user_socket *vsocket;
> diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
> index e59a834..9cbd7cd 100644
> --- a/lib/vhost/vdpa_driver.h
> +++ b/lib/vhost/vdpa_driver.h
> @@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
>  	/** Set the device configuration space */
>  	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
>  		      uint32_t size, uint32_t flags);
> +
> +	/** get device type: net device, blk device... */
> +	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
>  };
> 
>  /**
> diff --git a/lib/vhost/version.map b/lib/vhost/version.map
> index 0a66c58..fe4e8de 100644
> --- a/lib/vhost/version.map
> +++ b/lib/vhost/version.map
> @@ -87,6 +87,8 @@ EXPERIMENTAL {
> 
>  	# added in 22.03
>  	rte_vhost_async_dma_configure;
> +
> +	rte_vhost_driver_get_vdpa_dev_type;

Missed '# added in 22.07' tag, but when you do v8, this may not be a problem
as other patches may add this tag with new API introduced.

And introducing new API will need update of release note.
Please refer to http://git.dpdk.org/next/dpdk-next-virtio/commit/?id=868883e899af386abcc298ea80ec7f6a18d8a8e7
as an example.

Thanks,
Chenbo

>  };
> 
>  INTERNAL {
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
  2022-04-27  8:29     ` [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver Andy Pei
@ 2022-05-12 13:21       ` Xia, Chenbo
  2022-05-12 13:40         ` Xia, Chenbo
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:21 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
> 
> Add get device type ops to ifc driver.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8a260b7..99a6ab0 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1300,6 +1300,15 @@ struct rte_vdpa_dev_info {
>  	return 0;
>  }
> 
> +static int
> +ifcvf_blk_get_device_type(struct rte_vdpa_device *vdev,
> +	uint32_t *type)
> +{
> +	RTE_SET_USED(vdev);
> +	*type = VDPA_DEVICE_TYPE_BLK;
> +	return 0;

This is not right. Remember net and blk are both using the driver?
This will lead to using net also returns BLK.

And I suggest the patch-set validated with both blk and net...

Besides, ifcvf_blk_get_device_type should be ifcvf_get_device_type

Thanks,
Chenbo

> +}
> +
>  static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
>  	.get_queue_num = ifcvf_get_queue_num,
>  	.get_features = ifcvf_get_vdpa_features,
> @@ -1313,6 +1322,7 @@ struct rte_vdpa_dev_info {
>  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
>  	.get_notify_area = ifcvf_get_notify_area,
>  	.get_config = ifcvf_blk_get_config,
> +	.get_dev_type = ifcvf_blk_get_device_type,
>  };
> 
>  struct rte_vdpa_dev_info dev_info[] = {
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example
  2022-04-27  8:29     ` [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example Andy Pei
@ 2022-05-12 13:34       ` Xia, Chenbo
  2022-05-13  8:16         ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:34 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example

Better be: examples/vdpa: add virtio blk support

> 
> Add virtio blk device support to vDPA example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  examples/vdpa/main.c             | 57 +++++++++++++++++++++++++++++++++++
>  examples/vdpa/vdpa_blk_compact.h | 65
> ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 122 insertions(+)
>  create mode 100644 examples/vdpa/vdpa_blk_compact.h
> 
> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
> index 5ab0765..2544141 100644
> --- a/examples/vdpa/main.c
> +++ b/examples/vdpa/main.c
> @@ -20,6 +20,7 @@
>  #include <cmdline_parse_string.h>
>  #include <cmdline_parse_num.h>
>  #include <cmdline.h>
> +#include "vdpa_blk_compact.h"
> 
>  #define MAX_PATH_LEN 128
>  #define MAX_VDPA_SAMPLE_PORTS 1024
> @@ -159,8 +160,54 @@ struct vdpa_port {
>  };
> 
>  static int
> +vdpa_blk_device_set_features_and_protocol(const char *path)
> +{
> +	uint64_t protocol_features = 0;
> +	int ret;
> +
> +	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_disable_features(path,
> +		VHOST_BLK_DISABLED_FEATURES);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_disable_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_get_protocol_features(path,
> &protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_get_protocol_features for %s
> failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
> +
> +	ret = rte_vhost_driver_set_protocol_features(path,
> protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_protocol_features for %s
> failed.\n",
> +			path);
> +		goto out;

This goto is not needed.

> +	}
> +
> +out:
> +	return ret;
> +}
> +
> +static int
>  start_vdpa(struct vdpa_port *vport)
>  {
> +	uint32_t device_type = 0;
>  	int ret;
>  	char *socket_path = vport->ifname;
> 
> @@ -192,6 +239,16 @@ struct vdpa_port {
>  			"attach vdpa device failed: %s\n",
>  			socket_path);
> 
> +	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type);
> +	if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) {
> +		RTE_LOG(NOTICE, VDPA, "is a blk device\n");

Should add info of socket path

> +		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
> +		if (ret != 0)
> +			rte_exit(EXIT_FAILURE,
> +				"set vhost blk driver features and protocol
> features failed: %s\n",
> +				socket_path);
> +	}
> +
>  	if (rte_vhost_driver_start(socket_path) < 0)
>  		rte_exit(EXIT_FAILURE,
>  			"start vhost driver failed: %s\n",
> diff --git a/examples/vdpa/vdpa_blk_compact.h
> b/examples/vdpa/vdpa_blk_compact.h
> new file mode 100644
> index 0000000..136c3f6
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.h
> @@ -0,0 +1,65 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _VDPA_BLK_COMPACT_H_
> +#define _VDPA_BLK_COMPACT_H_
> +
> +/**
> + * @file
> + *
> + * Device specific vhost lib
> + */
> +
> +#include <stdbool.h>
> +
> +#include <rte_pci.h>

above two headers are not used in this file?

> +#include <rte_vhost.h>
> +
> +/* Feature bits */
> +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size
> */
> +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments
> */
> +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available
> */
> +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is
> available */
> +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> +
> +/* Legacy feature bits */
> +#ifndef VIRTIO_BLK_NO_LEGACY
> +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru
> */
> +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in
> config */
> +#endif /* !VIRTIO_BLK_NO_LEGACY */
> +
> +#ifndef VHOST_USER_F_PROTOCOL_FEATURES
> +#define VHOST_USER_F_PROTOCOL_FEATURES 30
> +#endif

It's already in rte_vhost.h, so no need to re-define.

Thanks,
Chenbo

> +
> +#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> +	(1ULL << VIRTIO_F_VERSION_1))
> +
> +#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL <<
> VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX))
> +
> +#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
> +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
> \
> +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  |
> \
> +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE)
> | \
> +	(1ULL << VIRTIO_BLK_F_MQ))
> +
> +/* Not supported features */
> +#define VHOST_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES_BASE | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
> +	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
> +
> +/* Vhost-blk support protocol features */
> +#define VHOST_BLK_PROTOCOL_FEATURES \
> +	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
> +	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
> +
> +#endif /* _VDPA_BLK_COMPACT_H_ */
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
  2022-05-12 13:21       ` Xia, Chenbo
@ 2022-05-12 13:40         ` Xia, Chenbo
  2022-05-13  7:38           ` Pei, Andy
  0 siblings, 1 reply; 191+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:40 UTC (permalink / raw)
  To: Xia, Chenbo, Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Thursday, May 12, 2022 9:22 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc
> driver
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> > Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> > Subject: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc
> driver
> >
> > Add get device type ops to ifc driver.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++++++++++
> >  1 file changed, 10 insertions(+)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > index 8a260b7..99a6ab0 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -1300,6 +1300,15 @@ struct rte_vdpa_dev_info {
> >  	return 0;
> >  }
> >
> > +static int
> > +ifcvf_blk_get_device_type(struct rte_vdpa_device *vdev,
> > +	uint32_t *type)
> > +{
> > +	RTE_SET_USED(vdev);
> > +	*type = VDPA_DEVICE_TYPE_BLK;
> > +	return 0;
> 
> This is not right. Remember net and blk are both using the driver?
> This will lead to using net also returns BLK.

Correct a bit for above: will lead to net not working. So implement
callback for both (although in the API, NULL callback can return NET type)

Thanks,
Chenbo

> 
> And I suggest the patch-set validated with both blk and net...
> 
> Besides, ifcvf_blk_get_device_type should be ifcvf_get_device_type
> 
> Thanks,
> Chenbo
> 
> > +}
> > +
> >  static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
> >  	.get_queue_num = ifcvf_get_queue_num,
> >  	.get_features = ifcvf_get_vdpa_features,
> > @@ -1313,6 +1322,7 @@ struct rte_vdpa_dev_info {
> >  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> >  	.get_notify_area = ifcvf_get_notify_area,
> >  	.get_config = ifcvf_blk_get_config,
> > +	.get_dev_type = ifcvf_blk_get_device_type,
> >  };
> >
> >  struct rte_vdpa_dev_info dev_info[] = {
> > --
> > 1.8.3.1


^ permalink raw reply	[flat|nested] 191+ messages in thread

* RE: [PATCH v7 11/18] vdpa/ifc: add set vring state for blk device
  2022-04-27  8:29     ` [PATCH v7 11/18] vdpa/ifc: add set vring state for " Andy Pei
@ 2022-05-12 13:44       ` Xia, Chenbo
  0 siblings, 0 replies; 191+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:44 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng

> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 11/18] vdpa/ifc: add set vring state for blk device
> 
> Set_vring_state op is mandatory, add set_vring_state for blk device.
> Currently set_vring_state for blk device is not implemented.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 99a6ab0..ca49bc3 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1290,6 +1290,16 @@ struct rte_vdpa_dev_info {
>  }
> 
>  static int
> +ifcvf_blk_set_vring_state(int vid, int vring, int state)
> +{
> +	RTE_SET_USED(vid);
> +	RTE_SET_USED(vring);
> +	RTE_SET_USED(state);
> +
> +	return 0;
> +}
> +
> +static int
>  ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
>  	uint64_t *features)
>  {
> @@ -1316,7 +1326,7 @@ struct rte_vdpa_dev_info {
>  	.get_protocol_features = ifcvf_blk_get_protocol_features,
>  	.dev_conf = ifcvf_dev_config,
>  	.dev_close = ifcvf_dev_close,
> -	.set_vring_state = NULL,
> +	.set_vring_state = ifcvf_blk_set_vring_state,

About naming in this driver, if you use two functions for net/blk
and name the one for blk as XXX_blk_XXX. Better rename the